summaryrefslogtreecommitdiff
path: root/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend
diff options
context:
space:
mode:
authorLibravatar Daenney <daenney@users.noreply.github.com>2024-06-12 14:21:34 +0200
committerLibravatar GitHub <noreply@github.com>2024-06-12 13:21:34 +0100
commit978b4176f1a31a497aaadd33f21659b318832c95 (patch)
tree8ab36617b993a457af5d2975bedaa63a57031ff3 /vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend
parent[bugfix] Correct Swagger path for poll voting (#2996) (diff)
downloadgotosocial-978b4176f1a31a497aaadd33f21659b318832c95.tar.xz
[chore] Upgrade wasm-sqlite to v0.16.2 (#2997)
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend')
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go28
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go16
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go30
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go29
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go119
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go438
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go136
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go6
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go379
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go59
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go17
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go51
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go10
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go12
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go140
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go44
16 files changed, 775 insertions, 739 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
index 81c6a6b62..8e9571b20 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
@@ -43,7 +43,7 @@ type ExecutableContextT[Instr any] struct {
labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
NextLabel Label
// LabelPositions maps a label to the instructions of the region which the label represents.
- LabelPositions map[Label]*LabelPosition[Instr]
+ LabelPositions []*LabelPosition[Instr]
OrderedBlockLabels []*LabelPosition[Instr]
// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
@@ -67,7 +67,6 @@ func NewExecutableContextT[Instr any](
setNext: setNext,
setPrev: setPrev,
labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
- LabelPositions: make(map[Label]*LabelPosition[Instr]),
NextLabel: LabelInvalid,
}
}
@@ -97,11 +96,7 @@ func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
end := e.allocateNop0()
e.PerBlockHead, e.PerBlockEnd = end, end
- labelPos, ok := e.LabelPositions[l]
- if !ok {
- labelPos = e.AllocateLabelPosition(l)
- e.LabelPositions[l] = labelPos
- }
+ labelPos := e.GetOrAllocateLabelPosition(l)
e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
labelPos.Begin, labelPos.End = end, end
labelPos.SB = blk
@@ -146,8 +141,8 @@ func (e *ExecutableContextT[T]) FlushPendingInstructions() {
func (e *ExecutableContextT[T]) Reset() {
e.labelPositionPool.Reset()
e.InstructionPool.Reset()
- for l := Label(0); l <= e.NextLabel; l++ {
- delete(e.LabelPositions, l)
+ for i := range e.LabelPositions {
+ e.LabelPositions[i] = nil
}
e.PendingInstructions = e.PendingInstructions[:0]
e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
@@ -163,10 +158,17 @@ func (e *ExecutableContextT[T]) AllocateLabel() Label {
return e.NextLabel
}
-func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
- l := e.labelPositionPool.Allocate()
- l.L = la
- return l
+func (e *ExecutableContextT[T]) GetOrAllocateLabelPosition(l Label) *LabelPosition[T] {
+ if len(e.LabelPositions) <= int(l) {
+ e.LabelPositions = append(e.LabelPositions, make([]*LabelPosition[T], int(l)+1-len(e.LabelPositions))...)
+ }
+ ret := e.LabelPositions[l]
+ if ret == nil {
+ ret = e.labelPositionPool.Allocate()
+ ret.L = l
+ e.LabelPositions[l] = ret
+ }
+ return ret
}
func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
index 310ad2203..61ae6f406 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
@@ -1906,8 +1906,10 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {
func (m *machine) Format() string {
ectx := m.ectx
begins := map[*instruction]backend.Label{}
- for l, pos := range ectx.LabelPositions {
- begins[pos.Begin] = l
+ for _, pos := range ectx.LabelPositions {
+ if pos != nil {
+ begins[pos.Begin] = pos.L
+ }
}
irBlocks := map[backend.Label]ssa.BasicBlockID{}
@@ -1950,7 +1952,10 @@ func (m *machine) encodeWithoutSSA(root *instruction) {
offset := int64(len(*bufPtr))
if cur.kind == nop0 {
l := cur.nop0Label()
- if pos, ok := ectx.LabelPositions[l]; ok {
+ if int(l) >= len(ectx.LabelPositions) {
+ continue
+ }
+ if pos := ectx.LabelPositions[l]; pos != nil {
pos.BinaryOffset = offset
}
}
@@ -2005,7 +2010,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {
switch cur.kind {
case nop0:
l := cur.nop0Label()
- if pos, ok := ectx.LabelPositions[l]; ok {
+ if pos := ectx.LabelPositions[l]; pos != nil {
pos.BinaryOffset = offset
}
case sourceOffsetInfo:
@@ -2165,8 +2170,7 @@ func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nol
func (m *machine) allocateLabel() *labelPosition {
ectx := m.ectx
l := ectx.AllocateLabel()
- pos := ectx.AllocateLabelPosition(l)
- ectx.LabelPositions[l] = pos
+ pos := ectx.GetOrAllocateLabelPosition(l)
return pos
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
index 6615471c6..4eaa13ce1 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
@@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) {
bits := arg.Type.Bits()
// At this point of compilation, we don't yet know how much space exist below the return address.
// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
- amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
load := m.allocateInstr()
switch arg.Type {
case ssa.TypeI32, ssa.TypeI64:
- load.asULoad(operandNR(reg), amode, bits)
+ load.asULoad(reg, amode, bits)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
- load.asFpuLoad(operandNR(reg), amode, bits)
+ load.asFpuLoad(reg, amode, bits)
default:
panic("BUG")
}
@@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) {
// At this point of compilation, we don't yet know how much space exist below the return address.
// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
- amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
store := m.allocateInstr()
store.asStore(operandNR(reg), amode, bits)
m.insert(store)
@@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i
ldr := m.allocateInstr()
switch r.Type {
case ssa.TypeI32, ssa.TypeI64:
- ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
+ ldr.asULoad(reg, amode, r.Type.Bits())
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
- ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
+ ldr.asFpuLoad(reg, amode, r.Type.Bits())
default:
panic("BUG")
}
@@ -225,7 +227,7 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i
}
}
-func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
+func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) {
exct := m.executableContext
exct.PendingInstructions = exct.PendingInstructions[:0]
mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
@@ -235,15 +237,15 @@ func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset
return cur, mode
}
-func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
+func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode {
if rn.RegType() != regalloc.RegTypeInt {
panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
}
- var amode addressMode
+ amode := m.amodePool.Allocate()
if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
- amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
} else {
var indexReg regalloc.VReg
if allowTmpRegUse {
@@ -253,7 +255,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg
indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
m.lowerConstantI64(indexReg, offset)
}
- amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
+ *amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
}
return amode
}
@@ -315,7 +317,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b
} else {
ao = aluOpSub
}
- alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
+ alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true)
m.insert(alu)
} else {
m.lowerConstantI64(tmpRegVReg, diff)
@@ -326,7 +328,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b
} else {
ao = aluOpSub
}
- alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
+ alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true)
m.insert(alu)
}
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
index 7a9cceb33..f8b5d97ac 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
@@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo
} else {
postIndexImm = 8
}
- loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
+ loadMode := m.amodePool.Allocate()
+ *loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
instr := m.allocateInstr()
switch typ {
case ssa.TypeI32:
- instr.asULoad(loadTargetReg, loadMode, 32)
+ instr.asULoad(loadTargetReg.reg(), loadMode, 32)
case ssa.TypeI64:
- instr.asULoad(loadTargetReg, loadMode, 64)
+ instr.asULoad(loadTargetReg.reg(), loadMode, 64)
case ssa.TypeF32:
- instr.asFpuLoad(loadTargetReg, loadMode, 32)
+ instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32)
case ssa.TypeF64:
- instr.asFpuLoad(loadTargetReg, loadMode, 64)
+ instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64)
case ssa.TypeV128:
- instr.asFpuLoad(loadTargetReg, loadMode, 128)
+ instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128)
}
cur = linkInstr(cur, instr)
if isStackArg {
- var storeMode addressMode
+ var storeMode *addressMode
cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
toStack := m.allocateInstr()
toStack.asStore(loadTargetReg, storeMode, bits)
@@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg
}
if isStackArg {
- var loadMode addressMode
+ var loadMode *addressMode
cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
toReg := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
- toReg.asULoad(storeTargetReg, loadMode, bits)
+ toReg.asULoad(storeTargetReg.reg(), loadMode, bits)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
- toReg.asFpuLoad(storeTargetReg, loadMode, bits)
+ toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits)
default:
panic("TODO?")
}
cur = linkInstr(cur, toReg)
}
- mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
instr := m.allocateInstr()
instr.asStore(storeTargetReg, mode, bits)
cur = linkInstr(cur, instr)
@@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction
func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
instr := m.allocateInstr()
- mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
if store {
instr.asStore(operandNR(d), mode, 64)
} else {
- instr.asULoad(operandNR(d), mode, 64)
+ instr.asULoad(d, mode, 64)
}
return linkInstr(prev, instr)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
index 466b1f960..99e6bb482 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
@@ -87,7 +87,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
// Module context is always the second argument.
moduleCtrPtr := x1VReg
store := m.allocateInstr()
- amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
store.asStore(operandNR(moduleCtrPtr), amode, 64)
cur = linkInstr(cur, store)
}
@@ -120,11 +121,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
} else {
sizeInBits = 64
}
- store.asStore(operandNR(v),
- addressMode{
- kind: addressModeKindPostIndex,
- rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8),
- }, sizeInBits)
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)}
+ store.asStore(operandNR(v), amode, sizeInBits)
cur = linkInstr(cur, store)
}
@@ -139,7 +138,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
frameSizeReg = xzrVReg
sliceSizeReg = xzrVReg
}
- _amode := addressModePreOrPostIndex(spVReg, -16, true)
+ _amode := addressModePreOrPostIndex(m, spVReg, -16, true)
storeP := m.allocateInstr()
storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
cur = linkInstr(cur, storeP)
@@ -165,8 +164,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
ldr := m.allocateInstr()
// And load the return address.
- ldr.asULoad(operandNR(lrVReg),
- addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+ amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */)
+ ldr.asULoad(lrVReg, amode, 64)
cur = linkInstr(cur, ldr)
originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
@@ -183,23 +182,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
r := &abi.Rets[i]
if r.Kind == backend.ABIArgKindReg {
loadIntoReg := m.allocateInstr()
- mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
switch r.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
+ loadIntoReg.asULoad(r.Reg, mode, 32)
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
+ loadIntoReg.asULoad(r.Reg, mode, 64)
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
+ loadIntoReg.asFpuLoad(r.Reg, mode, 32)
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
+ loadIntoReg.asFpuLoad(r.Reg, mode, 64)
case ssa.TypeV128:
mode.imm = 16
- loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
+ loadIntoReg.asFpuLoad(r.Reg, mode, 128)
default:
panic("TODO")
}
@@ -208,28 +208,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
// First we need to load the value to a temporary just like ^^.
intTmp, floatTmp := x11VReg, v11VReg
loadIntoTmpReg := m.allocateInstr()
- mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
var resultReg regalloc.VReg
switch r.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
+ loadIntoTmpReg.asULoad(intTmp, mode, 32)
resultReg = intTmp
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
+ loadIntoTmpReg.asULoad(intTmp, mode, 64)
resultReg = intTmp
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
+ loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32)
resultReg = floatTmp
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
+ loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64)
resultReg = floatTmp
case ssa.TypeV128:
mode.imm = 16
- loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
+ loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128)
resultReg = floatTmp
default:
panic("TODO")
@@ -258,12 +259,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal
case regalloc.RegTypeFloat:
sizeInBits = 128
}
- store.asStore(operandNR(v),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: offset,
- }, sizeInBits)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: offset,
+ }
+ store.asStore(operandNR(v), mode, sizeInBits)
store.prev = cur
cur.next = store
cur = store
@@ -276,7 +278,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
for _, v := range regs {
load := m.allocateInstr()
- var as func(dst operand, amode addressMode, sizeInBits byte)
+ var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte)
var sizeInBits byte
switch v.RegType() {
case regalloc.RegTypeInt:
@@ -286,12 +288,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re
as = load.asFpuLoad
sizeInBits = 128
}
- as(operandNR(v),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: offset,
- }, sizeInBits)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: offset,
+ }
+ as(v, mode, sizeInBits)
cur = linkInstr(cur, load)
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
}
@@ -324,11 +327,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode
// Set the exit status on the execution context.
setExistStatus := m.allocateInstr()
- setExistStatus.asStore(operandNR(constReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
- }, 32)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()}
+ setExistStatus.asStore(operandNR(constReg), mode, 32)
cur = linkInstr(cur, setExistStatus)
return cur
}
@@ -340,12 +341,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
cur = linkInstr(cur, adr)
storeReturnAddr := m.allocateInstr()
- storeReturnAddr.asStore(operandNR(tmpRegVReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
- }, 64)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
+ }
+ storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64)
cur = linkInstr(cur, storeReturnAddr)
// Exit the execution.
@@ -364,11 +366,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe
cur = linkInstr(cur, movSp)
strSp := m.allocateInstr()
- strSp.asStore(operandNR(tmpRegVReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
- }, 64)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
+ }
+ strSp.asStore(operandNR(tmpRegVReg), mode, 64)
cur = linkInstr(cur, strSp)
return cur
}
@@ -376,27 +379,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe
func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
load := m.allocateInstr()
var result regalloc.VReg
- mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
switch arg.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asULoad(operandNR(intVReg), mode, 32)
+ load.asULoad(intVReg, mode, 32)
result = intVReg
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asULoad(operandNR(intVReg), mode, 64)
+ load.asULoad(intVReg, mode, 64)
result = intVReg
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asFpuLoad(operandNR(floatVReg), mode, 32)
+ load.asFpuLoad(floatVReg, mode, 32)
result = floatVReg
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asFpuLoad(operandNR(floatVReg), mode, 64)
+ load.asFpuLoad(floatVReg, mode, 64)
result = floatVReg
case ssa.TypeV128:
mode.imm = 16
- load.asFpuLoad(operandNR(floatVReg), mode, 128)
+ load.asFpuLoad(floatVReg, mode, 128)
result = floatVReg
default:
panic("TODO")
@@ -408,7 +412,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r
func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
store := m.allocateInstr()
- mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
var sizeInBits byte
switch result.Type {
case ssa.TypeI32, ssa.TypeF32:
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
index 8aabc5997..7121cb538 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
@@ -3,10 +3,12 @@ package arm64
import (
"fmt"
"math"
+ "unsafe"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+ "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
type (
@@ -22,9 +24,9 @@ type (
// TODO: optimize the layout later once the impl settles.
instruction struct {
prev, next *instruction
- u1, u2, u3 uint64
- rd, rm, rn, ra operand
- amode addressMode
+ u1, u2 uint64
+ rd regalloc.VReg
+ rm, rn operand
kind instructionKind
addedBeforeRegAlloc bool
}
@@ -174,7 +176,7 @@ func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {
switch defKinds[i.kind] {
case defKindNone:
case defKindRD:
- *regs = append(*regs, i.rd.nr())
+ *regs = append(*regs, i.rd)
case defKindCall:
_, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2)
for i := byte(0); i < retIntRealRegs; i++ {
@@ -194,7 +196,7 @@ func (i *instruction) AssignDef(reg regalloc.VReg) {
switch defKinds[i.kind] {
case defKindNone:
case defKindRD:
- i.rd = i.rd.assignReg(reg)
+ i.rd = reg
case defKindCall:
panic("BUG: call instructions shouldn't be assigned")
default:
@@ -329,7 +331,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
if rm := i.rm.reg(); rm.Valid() {
*regs = append(*regs, rm)
}
- if ra := i.ra.reg(); ra.Valid() {
+ if ra := regalloc.VReg(i.u2); ra.Valid() {
*regs = append(*regs, ra)
}
case useKindRNRN1RM:
@@ -341,18 +343,20 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
*regs = append(*regs, rm)
}
case useKindAMode:
- if amodeRN := i.amode.rn; amodeRN.Valid() {
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
*regs = append(*regs, amodeRN)
}
- if amodeRM := i.amode.rm; amodeRM.Valid() {
+ if amodeRM := amode.rm; amodeRM.Valid() {
*regs = append(*regs, amodeRM)
}
case useKindRNAMode:
*regs = append(*regs, i.rn.reg())
- if amodeRN := i.amode.rn; amodeRN.Valid() {
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
*regs = append(*regs, amodeRN)
}
- if amodeRM := i.amode.rm; amodeRM.Valid() {
+ if amodeRM := amode.rm; amodeRM.Valid() {
*regs = append(*regs, amodeRM)
}
case useKindCond:
@@ -374,7 +378,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
case useKindRDRewrite:
*regs = append(*regs, i.rn.reg())
*regs = append(*regs, i.rm.reg())
- *regs = append(*regs, i.rd.reg())
+ *regs = append(*regs, i.rd)
default:
panic(fmt.Sprintf("useKind for %v not defined", i))
}
@@ -408,8 +412,8 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
i.rm = i.rm.assignReg(reg)
}
} else {
- if rd := i.rd.reg(); rd.Valid() {
- i.rd = i.rd.assignReg(reg)
+ if rd := i.rd; rd.Valid() {
+ i.rd = reg
}
}
case useKindRNRN1RM:
@@ -435,32 +439,36 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
i.rm = i.rm.assignReg(reg)
}
} else {
- if ra := i.ra.reg(); ra.Valid() {
- i.ra = i.ra.assignReg(reg)
+ if ra := regalloc.VReg(i.u2); ra.Valid() {
+ i.u2 = uint64(reg)
}
}
case useKindAMode:
if index == 0 {
- if amodeRN := i.amode.rn; amodeRN.Valid() {
- i.amode.rn = reg
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
+ amode.rn = reg
}
} else {
- if amodeRM := i.amode.rm; amodeRM.Valid() {
- i.amode.rm = reg
+ amode := i.getAmode()
+ if amodeRM := amode.rm; amodeRM.Valid() {
+ amode.rm = reg
}
}
case useKindRNAMode:
if index == 0 {
i.rn = i.rn.assignReg(reg)
} else if index == 1 {
- if amodeRN := i.amode.rn; amodeRN.Valid() {
- i.amode.rn = reg
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
+ amode.rn = reg
} else {
panic("BUG")
}
} else {
- if amodeRM := i.amode.rm; amodeRM.Valid() {
- i.amode.rm = reg
+ amode := i.getAmode()
+ if amodeRM := amode.rm; amodeRM.Valid() {
+ amode.rm = reg
} else {
panic("BUG")
}
@@ -503,35 +511,35 @@ func (i *instruction) callFuncRef() ssa.FuncRef {
}
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
-func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {
i.kind = movZ
- i.rd = operandNR(dst)
+ i.rd = dst
i.u1 = imm
- i.u2 = shift
+ i.u2 = uint64(shift)
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
-func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {
i.kind = movK
- i.rd = operandNR(dst)
+ i.rd = dst
i.u1 = imm
- i.u2 = shift
+ i.u2 = uint64(shift)
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
-func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {
i.kind = movN
- i.rd = operandNR(dst)
+ i.rd = dst
i.u1 = imm
- i.u2 = shift
+ i.u2 = uint64(shift)
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
@@ -553,21 +561,21 @@ func (i *instruction) asRet() {
i.kind = ret
}
-func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) {
+func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) {
i.kind = storeP64
i.rn = operandNR(src1)
i.rm = operandNR(src2)
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) {
+func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) {
i.kind = loadP64
i.rn = operandNR(src1)
i.rm = operandNR(src2)
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 8:
i.kind = store8
@@ -589,10 +597,10 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
i.kind = fpuStore128
}
i.rn = src
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 8:
i.kind = sLoad8
@@ -604,10 +612,10 @@ func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
panic("BUG")
}
i.rd = dst
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 8:
i.kind = uLoad8
@@ -619,10 +627,10 @@ func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
i.kind = uLoad64
}
i.rd = dst
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 32:
i.kind = fpuLoad32
@@ -632,10 +640,18 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte)
i.kind = fpuLoad128
}
i.rd = dst
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
+func (i *instruction) getAmode() *addressMode {
+ return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1))
+}
+
+func (i *instruction) setAmode(a *addressMode) {
+ i.u1 = uint64(uintptr(unsafe.Pointer(a)))
+}
+
+func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) {
// NOTE: currently only has support for no-offset loads, though it is suspicious that
// we would need to support offset load (that is only available for post-index).
i.kind = vecLoad1R
@@ -646,32 +662,32 @@ func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) {
i.kind = cSet
- i.rd = operandNR(rd)
+ i.rd = rd
i.u1 = uint64(c)
if mask {
i.u2 = 1
}
}
-func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
+func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) {
i.kind = cSel
i.rd = rd
i.rn = rn
i.rm = rm
i.u1 = uint64(c)
if _64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
-func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
+func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) {
i.kind = fpuCSel
i.rd = rd
i.rn = rn
i.rm = rm
i.u1 = uint64(c)
if _64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
@@ -691,7 +707,7 @@ func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, tar
}
func (i *instruction) brTableSequenceOffsetsResolved() {
- i.u3 = 1 // indicate that the offsets are resolved, for debugging.
+ i.rm.data = 1 // indicate that the offsets are resolved, for debugging.
}
func (i *instruction) brLabel() label {
@@ -701,7 +717,7 @@ func (i *instruction) brLabel() label {
// brOffsetResolved is called when the target label is resolved.
func (i *instruction) brOffsetResolve(offset int64) {
i.u2 = uint64(offset)
- i.u3 = 1 // indicate that the offset is resolved, for debugging.
+ i.rm.data = 1 // indicate that the offset is resolved, for debugging.
}
func (i *instruction) brOffset() int64 {
@@ -714,7 +730,7 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
i.u1 = c.asUint64()
i.u2 = uint64(target)
if is64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
@@ -728,17 +744,17 @@ func (i *instruction) condBrLabel() label {
// condBrOffsetResolve is called when the target label is resolved.
func (i *instruction) condBrOffsetResolve(offset int64) {
- i.rd.data = uint64(offset)
- i.rd.data2 = 1 // indicate that the offset is resolved, for debugging.
+ i.rn.data = uint64(offset)
+ i.rn.data2 = 1 // indicate that the offset is resolved, for debugging.
}
// condBrOffsetResolved returns true if condBrOffsetResolve is already called.
func (i *instruction) condBrOffsetResolved() bool {
- return i.rd.data2 == 1
+ return i.rn.data2 == 1
}
func (i *instruction) condBrOffset() int64 {
- return int64(i.rd.data)
+ return int64(i.rn.data)
}
func (i *instruction) condBrCond() cond {
@@ -746,33 +762,33 @@ func (i *instruction) condBrCond() cond {
}
func (i *instruction) condBr64bit() bool {
- return i.u3 == 1
+ return i.u2&(1<<32) != 0
}
func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) {
i.kind = loadFpuConst32
i.u1 = raw
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) {
i.kind = loadFpuConst64
i.u1 = raw
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) {
i.kind = loadFpuConst128
i.u1 = lo
i.u2 = hi
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) {
i.kind = fpuCmp
i.rn, i.rm = rn, rm
if is64bit {
- i.u3 = 1
+ i.u1 = 1
}
}
@@ -783,12 +799,12 @@ func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, i
i.u1 = uint64(c)
i.u2 = uint64(flag)
if is64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// asALU setups a basic ALU instruction.
-func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
+func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {
switch rm.kind {
case operandKindNR:
i.kind = aluRRR
@@ -804,22 +820,22 @@ func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
i.u1 = uint64(aluOp)
i.rd, i.rn, i.rm = rd, rn, rm
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// asALU setups a basic ALU instruction.
-func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) {
+func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) {
i.kind = aluRRRR
i.u1 = uint64(aluOp)
- i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra
+ i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra)
if dst64bit {
- i.u3 = 1
+ i.u1 |= 1 << 32
}
}
// asALUShift setups a shift based ALU instruction.
-func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
+func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {
switch rm.kind {
case operandKindNR:
i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands.
@@ -831,17 +847,17 @@ func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool)
i.u1 = uint64(aluOp)
i.rd, i.rn, i.rm = rd, rn, rm
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) {
i.kind = aluRRBitmaskImm
i.u1 = uint64(aluOp)
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
i.u2 = imm
if dst64bit {
- i.u3 = 1
+ i.u1 |= 1 << 32
}
}
@@ -852,76 +868,76 @@ func (i *instruction) asMovToFPSR(rn regalloc.VReg) {
func (i *instruction) asMovFromFPSR(rd regalloc.VReg) {
i.kind = movFromFPSR
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) {
i.kind = bitRR
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
i.u1 = uint64(bitOp)
if is64bit {
i.u2 = 1
}
}
-func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) {
+func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {
i.kind = fpuRRR
i.u1 = uint64(op)
i.rd, i.rn, i.rm = rd, rn, rm
if dst64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
-func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) {
+func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) {
i.kind = fpuRR
i.u1 = uint64(op)
i.rd, i.rn = rd, rn
if dst64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) {
i.kind = extend
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
i.u1 = uint64(fromBits)
i.u2 = uint64(toBits)
if signed {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
func (i *instruction) asMove32(rd, rn regalloc.VReg) {
i.kind = mov32
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
}
func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction {
i.kind = mov64
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
return i
}
func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {
i.kind = fpuMov64
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
}
func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction {
i.kind = fpuMov128
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
return i
}
-func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) {
+func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) {
i.kind = movToVec
i.rd = rd
i.rn = rn
i.u1, i.u2 = uint64(arr), uint64(index)
}
-func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) {
+func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) {
if signed {
i.kind = movFromVecSigned
} else {
@@ -932,48 +948,48 @@ func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vec
i.u1, i.u2 = uint64(arr), uint64(index)
}
-func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) {
+func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) {
i.kind = vecDup
i.u1 = uint64(arr)
i.rn, i.rd = rn, rd
}
-func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) {
+func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) {
i.kind = vecDupElement
i.u1 = uint64(arr)
i.rn, i.rd = rn, rd
i.u2 = uint64(index)
}
-func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) {
+func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) {
i.kind = vecExtract
i.u1 = uint64(arr)
i.rn, i.rm, i.rd = rn, rm, rd
i.u2 = uint64(index)
}
-func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
+func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
i.kind = vecMovElement
i.u1 = uint64(arr)
- i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex)
+ i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32
i.rn, i.rd = rn, rd
}
-func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) {
+func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) {
i.kind = vecMisc
i.u1 = uint64(op)
i.rn, i.rd = rn, rd
i.u2 = uint64(arr)
}
-func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) {
+func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) {
i.kind = vecLanes
i.u1 = uint64(op)
i.rn, i.rd = rn, rd
i.u2 = uint64(arr)
}
-func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction {
+func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction {
i.kind = vecShiftImm
i.u1 = uint64(op)
i.rn, i.rm, i.rd = rn, rm, rd
@@ -981,7 +997,7 @@ func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrange
return i
}
-func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) {
+func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
switch nregs {
case 0, 1:
i.kind = vecTbl
@@ -1000,14 +1016,14 @@ func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangemen
i.u2 = uint64(arr)
}
-func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) {
+func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
i.kind = vecPermute
i.u1 = uint64(op)
i.rn, i.rm, i.rd = rn, rm, rd
i.u2 = uint64(arr)
}
-func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction {
+func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction {
i.kind = vecRRR
i.u1 = uint64(op)
i.rn, i.rd, i.rm = rn, rd, rm
@@ -1017,7 +1033,7 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement)
// asVecRRRRewrite encodes a vector instruction that rewrites the destination register.
// IMPORTANT: the destination register must be already defined before this instruction.
-func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) {
+func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
i.kind = vecRRRRewrite
i.u1 = uint64(op)
i.rn, i.rd, i.rm = rn, rd, rm
@@ -1033,8 +1049,8 @@ func (i *instruction) IsCopy() bool {
// String implements fmt.Stringer.
func (i *instruction) String() (str string) {
- is64SizeBitToSize := func(u3 uint64) byte {
- if u3 == 0 {
+ is64SizeBitToSize := func(v uint64) byte {
+ if v == 0 {
return 32
}
return 64
@@ -1049,46 +1065,46 @@ func (i *instruction) String() (str string) {
str = "nop0"
}
case aluRRR:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size),
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size),
i.rm.format(size))
case aluRRRR:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u1 >> 32)
str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size))
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size))
case aluRRImm12:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
case aluRRBitmaskImm:
- size := is64SizeBitToSize(i.u3)
- rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size)
+ size := is64SizeBitToSize(i.u1 >> 32)
+ rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size)
if size == 32 {
str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2))
} else {
str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2)
}
case aluRRImmShift:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %#x",
aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
i.rm.shiftImm(),
)
case aluRRRShift:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s",
aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
i.rm.format(size),
)
case aluRRRExtend:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
// Regardless of the source size, the register is formatted in 32-bit.
i.rm.format(32),
@@ -1097,57 +1113,57 @@ func (i *instruction) String() (str string) {
size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("%s %s, %s",
bitOp(i.u1),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
)
case uLoad8:
- str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case sLoad8:
- str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case uLoad16:
- str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case sLoad16:
- str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case uLoad32:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case sLoad32:
- str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case uLoad64:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64))
case store8:
- str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8))
+ str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8))
case store16:
- str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16))
+ str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16))
case store32:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32))
case store64:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64))
case storeP64:
str = fmt.Sprintf("stp %s, %s, %s",
- formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
+ formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64))
case loadP64:
str = fmt.Sprintf("ldp %s, %s, %s",
- formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
+ formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64))
case mov64:
str = fmt.Sprintf("mov %s, %s",
- formatVRegSized(i.rd.nr(), 64),
+ formatVRegSized(i.rd, 64),
formatVRegSized(i.rn.nr(), 64))
case mov32:
- str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32))
+ str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32))
case movZ:
- size := is64SizeBitToSize(i.u3)
- str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+ size := is64SizeBitToSize(i.u2 >> 32)
+ str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)
case movN:
- size := is64SizeBitToSize(i.u3)
- str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+ size := is64SizeBitToSize(i.u2 >> 32)
+ str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)
case movK:
- size := is64SizeBitToSize(i.u3)
- str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+ size := is64SizeBitToSize(i.u2 >> 32)
+ str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)
case extend:
fromBits, toBits := byte(i.u1), byte(i.u2)
var signedStr string
- if i.u3 == 1 {
+ if i.u2>>32 == 1 {
signedStr = "s"
} else {
signedStr = "u"
@@ -1161,39 +1177,39 @@ func (i *instruction) String() (str string) {
case 32:
fromStr = "w"
}
- str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32))
+ str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32))
case cSel:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("csel %s, %s, %s, %s",
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
formatVRegSized(i.rm.nr(), size),
condFlag(i.u1),
)
case cSet:
if i.u2 != 0 {
- str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
+ str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1))
} else {
- str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
+ str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1))
}
case cCmpImm:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s",
formatVRegSized(i.rn.nr(), size), i.rm.data,
i.u2&0b1111,
condFlag(i.u1))
case fpuMov64:
str = fmt.Sprintf("mov %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement8B, vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone))
case fpuMov128:
str = fmt.Sprintf("mov %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement16B, vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone))
case fpuMovFromVec:
panic("TODO")
case fpuRR:
- dstSz := is64SizeBitToSize(i.u3)
+ dstSz := is64SizeBitToSize(i.u2)
srcSz := dstSz
op := fpuUniOp(i.u1)
switch op {
@@ -1203,38 +1219,38 @@ func (i *instruction) String() (str string) {
srcSz = 64
}
str = fmt.Sprintf("%s %s, %s", op.String(),
- formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz))
+ formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz))
case fpuRRR:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
case fpuRRI:
panic("TODO")
case fpuRRRR:
panic("TODO")
case fpuCmp:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u1)
str = fmt.Sprintf("fcmp %s, %s",
formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
case fpuLoad32:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case fpuStore32:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64))
case fpuLoad64:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64))
case fpuStore64:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64))
case fpuLoad128:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64))
case fpuStore128:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64))
case loadFpuConst32:
- str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1)))
+ str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1)))
case loadFpuConst64:
- str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1))
+ str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1))
case loadFpuConst128:
str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x",
- formatVRegSized(i.rd.nr(), 128), i.u1, i.u2)
+ formatVRegSized(i.rd, 128), i.u1, i.u2)
case fpuToInt:
var op, src, dst string
if signed := i.u1 == 1; signed {
@@ -1242,15 +1258,15 @@ func (i *instruction) String() (str string) {
} else {
op = "fcvtzu"
}
- if src64 := i.u2 == 1; src64 {
+ if src64 := i.u2&1 != 0; src64 {
src = formatVRegWidthVec(i.rn.nr(), vecArrangementD)
} else {
src = formatVRegWidthVec(i.rn.nr(), vecArrangementS)
}
- if dst64 := i.u3 == 1; dst64 {
- dst = formatVRegSized(i.rd.nr(), 64)
+ if dst64 := i.u2&2 != 0; dst64 {
+ dst = formatVRegSized(i.rd, 64)
} else {
- dst = formatVRegSized(i.rd.nr(), 32)
+ dst = formatVRegSized(i.rd, 32)
}
str = fmt.Sprintf("%s %s, %s", op, dst, src)
@@ -1261,21 +1277,21 @@ func (i *instruction) String() (str string) {
} else {
op = "ucvtf"
}
- if src64 := i.u2 == 1; src64 {
+ if src64 := i.u2&1 != 0; src64 {
src = formatVRegSized(i.rn.nr(), 64)
} else {
src = formatVRegSized(i.rn.nr(), 32)
}
- if dst64 := i.u3 == 1; dst64 {
- dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD)
+ if dst64 := i.u2&2 != 0; dst64 {
+ dst = formatVRegWidthVec(i.rd, vecArrangementD)
} else {
- dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS)
+ dst = formatVRegWidthVec(i.rd, vecArrangementS)
}
str = fmt.Sprintf("%s %s, %s", op, dst, src)
case fpuCSel:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("fcsel %s, %s, %s, %s",
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
formatVRegSized(i.rm.nr(), size),
condFlag(i.u1),
@@ -1291,7 +1307,7 @@ func (i *instruction) String() (str string) {
default:
panic("unsupported arrangement " + arr.String())
}
- str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
+ str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
case movFromVec, movFromVecSigned:
var size byte
var opcode string
@@ -1315,23 +1331,23 @@ func (i *instruction) String() (str string) {
default:
panic("unsupported arrangement " + arr.String())
}
- str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
+ str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
case vecDup:
str = fmt.Sprintf("dup %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone),
formatVRegSized(i.rn.nr(), 64),
)
case vecDupElement:
arr := vecArrangement(i.u1)
str = fmt.Sprintf("dup %s, %s",
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)),
)
case vecDupFromFpu:
panic("TODO")
case vecExtract:
str = fmt.Sprintf("ext %s, %s, %s, #%d",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone),
formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone),
uint32(i.u2),
@@ -1340,15 +1356,15 @@ func (i *instruction) String() (str string) {
panic("TODO")
case vecMovElement:
str = fmt.Sprintf("mov %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)),
- formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)),
+ formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)),
+ formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)),
)
case vecMiscNarrow:
panic("TODO")
case vecRRR, vecRRRRewrite:
str = fmt.Sprintf("%s %s, %s, %s",
vecOp(i.u1),
- formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone),
)
@@ -1356,12 +1372,12 @@ func (i *instruction) String() (str string) {
vop := vecOp(i.u1)
if vop == vecOpCmeq0 {
str = fmt.Sprintf("cmeq %s, %s, #0",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
} else {
str = fmt.Sprintf("%s %s, %s",
vop,
- formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
}
case vecLanes:
@@ -1379,24 +1395,24 @@ func (i *instruction) String() (str string) {
}
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
- formatVRegWidthVec(i.rd.nr(), destArr),
+ formatVRegWidthVec(i.rd, destArr),
formatVRegVec(i.rn.nr(), arr, vecIndexNone))
case vecShiftImm:
arr := vecArrangement(i.u2)
str = fmt.Sprintf("%s %s, %s, #%d",
vecOp(i.u1),
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), arr, vecIndexNone),
i.rm.shiftImm())
case vecTbl:
arr := vecArrangement(i.u2)
str = fmt.Sprintf("tbl %s, { %s }, %s",
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone),
formatVRegVec(i.rm.nr(), arr, vecIndexNone))
case vecTbl2:
arr := vecArrangement(i.u2)
- rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr()
+ rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr()
rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
str = fmt.Sprintf("tbl %s, { %s, %s }, %s",
formatVRegVec(rd, arr, vecIndexNone),
@@ -1407,13 +1423,13 @@ func (i *instruction) String() (str string) {
arr := vecArrangement(i.u2)
str = fmt.Sprintf("%s %s, %s, %s",
vecOp(i.u1),
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), arr, vecIndexNone),
formatVRegVec(i.rm.nr(), arr, vecIndexNone))
case movToFPSR:
str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64))
case movFromFPSR:
- str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64))
+ str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64))
case call:
str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1))
case callInd:
@@ -1422,15 +1438,15 @@ func (i *instruction) String() (str string) {
str = "ret"
case br:
target := label(i.u1)
- if i.u3 != 0 {
+ if i.rm.data != 0 {
str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String())
} else {
str = fmt.Sprintf("b %s", target.String())
}
case condBr:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
c := cond(i.u1)
- target := label(i.u2)
+ target := label(i.u2 & 0xffffffff)
switch c.kind() {
case condKindRegisterZero:
if !i.condBrOffsetResolved() {
@@ -1456,7 +1472,7 @@ func (i *instruction) String() (str string) {
}
}
case adr:
- str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1))
+ str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1))
case brTableSequence:
targetIndex := i.u1
str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex)
@@ -1473,7 +1489,7 @@ func (i *instruction) String() (str string) {
case 1:
m = m + "b"
}
- str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64))
case atomicCas:
m := "casal"
size := byte(32)
@@ -1485,7 +1501,7 @@ func (i *instruction) String() (str string) {
case 1:
m = m + "b"
}
- str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
case atomicLoad:
m := "ldar"
size := byte(32)
@@ -1497,7 +1513,7 @@ func (i *instruction) String() (str string) {
case 1:
m = m + "b"
}
- str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64))
case atomicStore:
m := "stlr"
size := byte(32)
@@ -1517,9 +1533,9 @@ func (i *instruction) String() (str string) {
case emitSourceOffsetInfo:
str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1))
case vecLoad1R:
- str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
case loadConstBlockArg:
- str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1)
+ str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1)
default:
panic(i.kind)
}
@@ -1528,26 +1544,26 @@ func (i *instruction) String() (str string) {
func (i *instruction) asAdr(rd regalloc.VReg, offset int64) {
i.kind = adr
- i.rd = operandNR(rd)
+ i.rd = rd
i.u1 = uint64(offset)
}
-func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) {
+func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) {
i.kind = atomicRmw
- i.rd, i.rn, i.rm = rt, rn, rs
+ i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs)
i.u1 = uint64(op)
i.u2 = size
}
-func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) {
+func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) {
i.kind = atomicCas
- i.rm, i.rn, i.rd = rt, rn, rs
+ i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs
i.u2 = size
}
-func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) {
+func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) {
i.kind = atomicLoad
- i.rn, i.rd = rn, rt
+ i.rn, i.rd = operandNR(rn), rt
i.u2 = size
}
@@ -1755,12 +1771,12 @@ func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.V
i.kind = loadConstBlockArg
i.u1 = v
i.u2 = uint64(typ)
- i.rd = operandNR(dst)
+ i.rd = dst
return i
}
func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) {
- return i.u1, ssa.Type(i.u2), i.rd.nr()
+ return i.u1, ssa.Type(i.u2), i.rd
}
func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction {
@@ -1778,7 +1794,7 @@ func (i *instruction) asUDF() *instruction {
return i
}
-func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) {
+func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) {
i.kind = fpuToInt
i.rn = rn
i.rd = rd
@@ -1789,11 +1805,11 @@ func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bo
i.u2 = 1
}
if dst64bit {
- i.u3 = 1
+ i.u2 |= 2
}
}
-func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) {
+func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) {
i.kind = intToFpu
i.rn = rn
i.rd = rd
@@ -1804,7 +1820,7 @@ func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bo
i.u2 = 1
}
if dst64bit {
- i.u3 = 1
+ i.u2 |= 2
}
}
@@ -1817,7 +1833,7 @@ func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction {
// aluOp determines the type of ALU operation. Instructions whose kind is one of
// aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend
// would use this type.
-type aluOp int
+type aluOp uint32
func (a aluOp) String() string {
switch a {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
index 227a96474..f0ede2d6a 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
@@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) {
case callInd:
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128:
- c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
+ c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode()))
case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128:
- c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
+ c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode()))
case vecLoad1R:
c.Emit4Bytes(encodeVecLoad1R(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u1)))
case condBr:
@@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) {
panic("BUG")
}
case movN:
- c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+ c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
case movZ:
- c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+ c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
case movK:
- c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+ c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
case mov32:
- to, from := i.rd.realReg(), i.rn.realReg()
+ to, from := i.rd.RealReg(), i.rn.realReg()
c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to]))
case mov64:
- to, from := i.rd.realReg(), i.rn.realReg()
+ to, from := i.rd.RealReg(), i.rn.realReg()
toIsSp := to == sp
fromIsSp := from == sp
c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp))
case loadP64, storeP64:
rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
- amode := i.amode
+ amode := i.getAmode()
rn := regNumberInEncoding[amode.rn.RealReg()]
var pre bool
switch amode.kind {
@@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) {
}
c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm))
case loadFpuConst32:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
if i.u1 == 0 {
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
} else {
encodeLoadFpuConst32(c, rd, i.u1)
}
case loadFpuConst64:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
if i.u1 == 0 {
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
} else {
- encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1)
+ encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1)
}
case loadFpuConst128:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
lo, hi := i.u1, i.u2
if lo == 0 && hi == 0 {
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B))
@@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) {
case aluRRRR:
c.Emit4Bytes(encodeAluRRRR(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
- regNumberInEncoding[i.ra.realReg()],
- uint32(i.u3),
+ regNumberInEncoding[regalloc.VReg(i.u2).RealReg()],
+ uint32(i.u1>>32),
))
case aluRRImmShift:
c.Emit4Bytes(encodeAluRRImm(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.rm.shiftImm()),
- uint32(i.u3),
+ uint32(i.u2>>32),
))
case aluRRR:
rn := i.rn.realReg()
c.Emit4Bytes(encodeAluRRR(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[rn],
regNumberInEncoding[i.rm.realReg()],
- i.u3 == 1,
+ i.u2>>32 == 1,
rn == sp,
))
case aluRRRExtend:
rm, exo, to := i.rm.er()
c.Emit4Bytes(encodeAluRRRExtend(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[rm.RealReg()],
exo,
@@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) {
r, amt, sop := i.rm.sr()
c.Emit4Bytes(encodeAluRRRShift(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[r.RealReg()],
uint32(amt),
sop,
- i.u3 == 1,
+ i.u2>>32 == 1,
))
case aluRRBitmaskImm:
c.Emit4Bytes(encodeAluBitmaskImmediate(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
i.u2,
- i.u3 == 1,
+ i.u1>>32 == 1,
))
case bitRR:
c.Emit4Bytes(encodeBitRR(
bitOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2)),
)
@@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) {
imm12, shift := i.rm.imm12()
c.Emit4Bytes(encodeAluRRImm12(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
imm12, shift,
- i.u3 == 1,
+ i.u2>>32 == 1,
))
case fpuRRR:
c.Emit4Bytes(encodeFpuRRR(
fpuBinOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
- i.u3 == 1,
+ i.u2 == 1,
))
case fpuMov64, fpuMov128:
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register--
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
rn := regNumberInEncoding[i.rn.realReg()]
var q uint32
if kind == fpuMov128 {
@@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) {
}
c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd)
case cSet:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
cf := condFlag(i.u1)
if i.u2 == 1 {
// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV-
@@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd)
}
case extend:
- c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()]))
+ c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()]))
case fpuCmp:
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en
rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
var ftype uint32
- if i.u3 == 1 {
+ if i.u1 == 1 {
ftype = 0b01 // double precision.
}
c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5)
@@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(0)
}
case adr:
- c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1)))
+ c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1)))
case cSel:
c.Emit4Bytes(encodeConditionalSelect(
kind,
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
condFlag(i.u1),
- i.u3 == 1,
+ i.u2 == 1,
))
case fpuCSel:
c.Emit4Bytes(encodeFpuCSel(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
condFlag(i.u1),
- i.u3 == 1,
+ i.u2 == 1,
))
case movToVec:
c.Emit4Bytes(encodeMoveToVec(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2),
))
case movFromVec, movFromVecSigned:
c.Emit4Bytes(encodeMoveFromVec(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2),
@@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) {
))
case vecDup:
c.Emit4Bytes(encodeVecDup(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1))))
case vecDupElement:
c.Emit4Bytes(encodeVecDupElement(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2)))
case vecExtract:
c.Emit4Bytes(encodeVecExtract(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(byte(i.u1)),
@@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) {
case vecPermute:
c.Emit4Bytes(encodeVecPermute(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(byte(i.u2))))
case vecMovElement:
c.Emit4Bytes(encodeVecMovElement(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u1),
- uint32(i.u2), uint32(i.u3),
+ uint32(i.u2), uint32(i.u2>>32),
))
case vecMisc:
c.Emit4Bytes(encodeAdvancedSIMDTwoMisc(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u2),
))
case vecLanes:
c.Emit4Bytes(encodeVecLanes(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u2),
))
case vecShiftImm:
c.Emit4Bytes(encodeVecShiftImm(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.rm.shiftImm()),
vecArrangement(i.u2),
@@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) {
case vecTbl:
c.Emit4Bytes(encodeVecTbl(
1,
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(i.u2)),
@@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) {
case vecTbl2:
c.Emit4Bytes(encodeVecTbl(
2,
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(i.u2)),
@@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) {
case fpuRR:
c.Emit4Bytes(encodeFloatDataOneSource(
fpuUniOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
- i.u3 == 1,
+ i.u2 == 1,
))
case vecRRR:
if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal {
@@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) {
case vecRRRRewrite:
c.Emit4Bytes(encodeVecRRR(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(i.u2),
))
case cCmpImm:
// Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
- sf := uint32(i.u3 & 0b1)
+ sf := uint32((i.u2 >> 32) & 0b1)
nzcv := uint32(i.u2 & 0b1111)
cond := uint32(condFlag(i.u1))
imm := uint32(i.rm.data & 0b11111)
@@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) {
sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv,
)
case movFromFPSR:
- rt := regNumberInEncoding[i.rd.realReg()]
+ rt := regNumberInEncoding[i.rd.RealReg()]
c.Emit4Bytes(encodeSystemRegisterMove(rt, true))
case movToFPSR:
rt := regNumberInEncoding[i.rn.realReg()]
@@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(encodeAtomicRmw(
atomicRmwOp(i.u1),
regNumberInEncoding[i.rm.realReg()],
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2),
))
case atomicCas:
c.Emit4Bytes(encodeAtomicCas(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rm.realReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2),
@@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) {
case atomicLoad:
c.Emit4Bytes(encodeAtomicLoadStore(
regNumberInEncoding[i.rn.realReg()],
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
uint32(i.u2),
1,
))
@@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32
// encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
func encodeCnvBetweenFloatInt(i *instruction) uint32 {
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
rn := regNumberInEncoding[i.rn.realReg()]
var opcode uint32
@@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {
rmode = 0b00
signed := i.u1 == 1
- src64bit := i.u2 == 1
- dst64bit := i.u3 == 1
+ src64bit := i.u2&1 != 0
+ dst64bit := i.u2&2 != 0
if signed {
opcode = 0b010
} else {
@@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {
rmode = 0b11
signed := i.u1 == 1
- src64bit := i.u2 == 1
- dst64bit := i.u3 == 1
+ src64bit := i.u2&1 != 0
+ dst64bit := i.u2&2 != 0
if signed {
opcode = 0b000
@@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) {
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
//
// "shift" must have been divided by 16 at this point.
-func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) {
+func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) {
ret = rd
ret |= uint32(imm&0xffff) << 5
- ret |= (uint32(shift)) << 21
+ ret |= (shift) << 21
ret |= 0b100101 << 23
ret |= opc << 29
- ret |= uint32(_64bit) << 31
+ ret |= _64bit << 31
return
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
index 698b382d4..6c6824fb0 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
@@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
- instr.asMOVZ(dst, v, uint64(shift), dst64)
+ instr.asMOVZ(dst, v, uint32(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
- instr.asMOVK(dst, v, uint64(shift), dst64)
+ instr.asMOVK(dst, v, uint32(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
- instr.asMOVN(dst, v, uint64(shift), dst64)
+ instr.asMOVN(dst, v, uint32(shift), dst64)
m.insert(instr)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
index 2bb234e8c..048bf3204 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
@@ -52,11 +52,11 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) {
maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32)
m.lowerConstantI32(maxIndexReg, int32(len(targets)-1))
subs := m.allocateInstr()
- subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false)
+ subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false)
m.insert(subs)
csel := m.allocateInstr()
adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32)
- csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false)
+ csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false)
m.insert(csel)
brSequence := m.allocateInstr()
@@ -249,7 +249,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerSelectVec(rc, rn, rm, rd)
} else {
m.lowerSelect(c, x, y, instr.Return())
@@ -270,7 +270,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, ctx := instr.Arg2()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
ctxVReg := m.compiler.VRegOf(ctx)
m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64,
result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat)
@@ -278,7 +278,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, ctx := instr.Arg2()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
ctxVReg := m.compiler.VRegOf(ctx)
m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64,
result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat)
@@ -286,25 +286,25 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x := instr.Arg()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)
case ssa.OpcodeFcvtFromUint:
x := instr.Arg()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)
case ssa.OpcodeFdemote:
v := instr.Arg()
rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
cnt := m.allocateInstr()
cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false)
m.insert(cnt)
case ssa.OpcodeFpromote:
v := instr.Arg()
rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
cnt := m.allocateInstr()
cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true)
m.insert(cnt)
@@ -343,15 +343,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
ctxVReg := m.compiler.VRegOf(ctx)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv)
case ssa.OpcodeSrem, ssa.OpcodeUrem:
x, y, ctx := instr.Arg3()
ctxVReg := m.compiler.VRegOf(ctx)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
- m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem)
+ rd := m.compiler.VRegOf(instr.Return())
+ m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem)
case ssa.OpcodeVconst:
result := m.compiler.VRegOf(instr.Return())
lo, hi := instr.VconstData()
@@ -362,7 +362,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x := instr.Arg()
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B)
m.insert(ins)
case ssa.OpcodeVbxor:
@@ -382,12 +382,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone)
- tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp := m.compiler.AllocateVReg(ssa.TypeV128)
// creg is overwritten by BSL, so we need to move it to the result register before the instruction
// in case when it is used somewhere else.
mov := m.allocateInstr()
- mov.asFpuMov128(tmp.nr(), creg.nr())
+ mov.asFpuMov128(tmp, creg.nr())
m.insert(mov)
ins := m.allocateInstr()
@@ -396,7 +396,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
mov2 := m.allocateInstr()
rd := m.compiler.VRegOf(instr.Return())
- mov2.asFpuMov128(rd, tmp.nr())
+ mov2.asFpuMov128(rd, tmp)
m.insert(mov2)
case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue:
x, lane := instr.ArgWithLane()
@@ -405,12 +405,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
arr = ssaLaneToArrangement(lane)
}
rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVcheckTrue(op, rm, rd, arr)
case ssa.OpcodeVhighBits:
x, lane := instr.ArgWithLane()
rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
arr := ssaLaneToArrangement(lane)
m.lowerVhighBits(rm, rd, arr)
case ssa.OpcodeVIadd:
@@ -441,9 +441,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
panic("unsupported lane " + lane.String())
}
- widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr)
- widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr)
- addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr)
+ widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr)
+ widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr)
+ addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr)
m.insert(widenLo)
m.insert(widenHi)
m.insert(addp)
@@ -493,7 +493,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVIMul(rd, rn, rm, arr)
case ssa.OpcodeVIabs:
m.lowerVecMisc(vecOpAbs, instr)
@@ -507,7 +507,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVShift(op, rd, rn, rm, arr)
case ssa.OpcodeVSqrt:
m.lowerVecMisc(vecOpFsqrt, instr)
@@ -547,18 +547,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, lane := instr.ArgWithLane()
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat)
case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint:
x, lane := instr.ArgWithLane()
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint)
case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow:
x, lane := instr.ArgWithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
var arr vecArrangement
switch lane {
@@ -580,7 +580,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh:
x, lane := instr.ArgWithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
arr := ssaLaneToArrangement(lane)
@@ -607,9 +607,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
}
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
- tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp := m.compiler.AllocateVReg(ssa.TypeV128)
loQxtn := m.allocateInstr()
hiQxtn := m.allocateInstr()
@@ -628,7 +628,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
m.insert(hiQxtn)
mov := m.allocateInstr()
- mov.asFpuMov128(rd.nr(), tmp.nr())
+ mov.asFpuMov128(rd, tmp)
m.insert(mov)
case ssa.OpcodeFvpromoteLow:
x, lane := instr.ArgWithLane()
@@ -637,7 +637,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
}
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S)
m.insert(ins)
case ssa.OpcodeFvdemote:
@@ -647,14 +647,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
}
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S)
m.insert(ins)
case ssa.OpcodeExtractlane:
x, index, signed, lane := instr.ExtractlaneData()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
mov := m.allocateInstr()
switch lane {
@@ -680,12 +680,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, y, index, lane := instr.InsertlaneData()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
- tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ rd := m.compiler.VRegOf(instr.Return())
+ tmpReg := m.compiler.AllocateVReg(ssa.TypeV128)
// Initially mov rn to tmp.
mov1 := m.allocateInstr()
- mov1.asFpuMov128(tmpReg.nr(), rn.nr())
+ mov1.asFpuMov128(tmpReg, rn.nr())
m.insert(mov1)
// movToVec and vecMovElement do not clear the remaining bits to zero,
@@ -709,14 +709,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
// Finally mov tmp to rd.
mov3 := m.allocateInstr()
- mov3.asFpuMov128(rd.nr(), tmpReg.nr())
+ mov3.asFpuMov128(rd, tmpReg)
m.insert(mov3)
case ssa.OpcodeSwizzle:
x, y, lane := instr.Arg2WithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
arr := ssaLaneToArrangement(lane)
@@ -729,14 +729,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, y, lane1, lane2 := instr.ShuffleData()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerShuffle(rd, rn, rm, lane1, lane2)
case ssa.OpcodeSplat:
x, lane := instr.ArgWithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
dup := m.allocateInstr()
switch lane {
@@ -760,12 +760,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone),
m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
- m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H))
- m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H))
- m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S))
+ m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H))
+ m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H))
+ m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S))
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
- m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr()))
+ rd := m.compiler.VRegOf(instr.Return())
+ m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr()))
case ssa.OpcodeLoadSplat:
ptr, offset, lane := instr.LoadSplatData()
@@ -794,7 +794,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
m.executableContext.FlushPendingInstructions()
}
-func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) {
+func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) {
// `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30.
vReg, wReg := v29VReg, v30VReg
@@ -822,7 +822,7 @@ func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) {
m.insert(tbl2)
}
-func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) {
+func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
var modulo byte
switch arr {
case vecArrangement16B:
@@ -847,13 +847,13 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem
if op != ssa.OpcodeVIshl {
// Negate the amount to make this as right shift.
neg := m.allocateInstr()
- neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true)
+ neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true)
m.insert(neg)
}
// Copy the shift amount into a vector register as sshl/ushl requires it to be there.
dup := m.allocateInstr()
- dup.asVecDup(vtmp, rtmp, arr)
+ dup.asVecDup(vtmp.nr(), rtmp, arr)
m.insert(dup)
if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr {
@@ -867,7 +867,7 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem
}
}
-func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) {
+func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) {
tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
// Special case VallTrue for i64x2.
@@ -878,11 +878,11 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
// cset dst, eq
ins := m.allocateInstr()
- ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D)
+ ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D)
m.insert(ins)
addp := m.allocateInstr()
- addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D)
+ addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D)
m.insert(addp)
fcmp := m.allocateInstr()
@@ -890,7 +890,7 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
m.insert(fcmp)
cset := m.allocateInstr()
- cset.asCSet(rd.nr(), false, eq)
+ cset.asCSet(rd, false, eq)
m.insert(cset)
return
@@ -900,10 +900,10 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
ins := m.allocateInstr()
if op == ssa.OpcodeVanyTrue {
// umaxp v4?.16b, v2?.16b, v2?.16b
- ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B)
+ ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B)
} else {
// uminv d4?, v2?.4s
- ins.asVecLanes(vecOpUminv, tmp, rm, arr)
+ ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr)
}
m.insert(ins)
@@ -917,15 +917,15 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
m.insert(movv)
fc := m.allocateInstr()
- fc.asCCmpImm(rd, uint64(0), al, 0, true)
+ fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true)
m.insert(fc)
cset := m.allocateInstr()
- cset.asCSet(rd.nr(), false, ne)
+ cset.asCSet(rd, false, ne)
m.insert(cset)
}
-func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
+func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) {
r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
@@ -947,7 +947,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Right arithmetic shift on the original vector and store the result into v1. So we have:
// v1[i] = 0xff if vi<0, 0 otherwise.
sshr := m.allocateInstr()
- sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B)
+ sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B)
m.insert(sshr)
// Load the bit mask into r0.
@@ -958,7 +958,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// dup r0 to v0.
dup := m.allocateInstr()
- dup.asVecDup(v0, r0, vecArrangement2D)
+ dup.asVecDup(v0.nr(), r0, vecArrangement2D)
m.insert(dup)
// Lane-wise logical AND with the bit mask, meaning that we have
@@ -967,23 +967,23 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Below, we use the following notation:
// wi := (1 << i) if vi<0, 0 otherwise.
and := m.allocateInstr()
- and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B)
+ and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B)
m.insert(and)
// Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have
// v0[i] = w(i+8) if i < 8, w(i-8) otherwise.
ext := m.allocateInstr()
- ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8))
+ ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8))
m.insert(ext)
// v = [w0, w8, ..., w7, w15]
zip1 := m.allocateInstr()
- zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B)
+ zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B)
m.insert(zip1)
// v.h[0] = w0 + ... + w15
addv := m.allocateInstr()
- addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H)
+ addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H)
m.insert(addv)
// Extract the v.h[0] as the result.
@@ -1006,7 +1006,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Right arithmetic shift on the original vector and store the result into v1. So we have:
// v[i] = 0xffff if vi<0, 0 otherwise.
sshr := m.allocateInstr()
- sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H)
+ sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H)
m.insert(sshr)
// Load the bit mask into r0.
@@ -1014,26 +1014,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// dup r0 to vector v0.
dup := m.allocateInstr()
- dup.asVecDup(v0, r0, vecArrangement2D)
+ dup.asVecDup(v0.nr(), r0, vecArrangement2D)
m.insert(dup)
lsl := m.allocateInstr()
- lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true)
+ lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true)
m.insert(lsl)
movv := m.allocateInstr()
- movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1))
+ movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1))
m.insert(movv)
// Lane-wise logical AND with the bitmask, meaning that we have
// v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3
// = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7
and := m.allocateInstr()
- and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B)
+ and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B)
m.insert(and)
addv := m.allocateInstr()
- addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H)
+ addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H)
m.insert(addv)
movfv := m.allocateInstr()
@@ -1055,7 +1055,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Right arithmetic shift on the original vector and store the result into v1. So we have:
// v[i] = 0xffffffff if vi<0, 0 otherwise.
sshr := m.allocateInstr()
- sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S)
+ sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S)
m.insert(sshr)
// Load the bit mask into r0.
@@ -1063,26 +1063,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// dup r0 to vector v0.
dup := m.allocateInstr()
- dup.asVecDup(v0, r0, vecArrangement2D)
+ dup.asVecDup(v0.nr(), r0, vecArrangement2D)
m.insert(dup)
lsl := m.allocateInstr()
- lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true)
+ lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true)
m.insert(lsl)
movv := m.allocateInstr()
- movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1))
+ movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1))
m.insert(movv)
// Lane-wise logical AND with the bitmask, meaning that we have
// v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1]
// = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3]
and := m.allocateInstr()
- and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B)
+ and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B)
m.insert(and)
addv := m.allocateInstr()
- addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S)
+ addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S)
m.insert(addv)
movfv := m.allocateInstr()
@@ -1102,21 +1102,21 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Move the higher 64-bit int into r0.
movv1 := m.allocateInstr()
- movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false)
+ movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false)
m.insert(movv1)
// Move the sign bit into the least significant bit.
lsr1 := m.allocateInstr()
- lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true)
+ lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true)
m.insert(lsr1)
lsr2 := m.allocateInstr()
- lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true)
+ lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true)
m.insert(lsr2)
// rd = (r0<<1) | rd
lsl := m.allocateInstr()
- lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false)
+ lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false)
m.insert(lsl)
default:
panic("Unsupported " + arr.String())
@@ -1128,7 +1128,7 @@ func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) {
arr := ssaLaneToArrangement(lane)
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(op, rd, rn, arr)
m.insert(ins)
}
@@ -1137,22 +1137,22 @@ func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement)
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(ret))
+ rd := m.compiler.VRegOf(ret)
ins.asVecRRR(op, rd, rn, rm, arr)
m.insert(ins)
}
-func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
+func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
if arr != vecArrangement2D {
mul := m.allocateInstr()
mul.asVecRRR(vecOpMul, rd, rn, rm, arr)
m.insert(mul)
} else {
- tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
- tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
- tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp1 := m.compiler.AllocateVReg(ssa.TypeV128)
+ tmp2 := m.compiler.AllocateVReg(ssa.TypeV128)
+ tmp3 := m.compiler.AllocateVReg(ssa.TypeV128)
- tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmpRes := m.compiler.AllocateVReg(ssa.TypeV128)
// Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696
rev64 := m.allocateInstr()
@@ -1160,7 +1160,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
m.insert(rev64)
mul := m.allocateInstr()
- mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S)
+ mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S)
m.insert(mul)
xtn1 := m.allocateInstr()
@@ -1168,7 +1168,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
m.insert(xtn1)
addp := m.allocateInstr()
- addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S)
+ addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S)
m.insert(addp)
xtn2 := m.allocateInstr()
@@ -1179,15 +1179,15 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
// In short, in UMLAL instruction, the result register is also one of the source register, and
// the value on the result register is significant.
shll := m.allocateInstr()
- shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S)
+ shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S)
m.insert(shll)
umlal := m.allocateInstr()
- umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S)
+ umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S)
m.insert(umlal)
mov := m.allocateInstr()
- mov.asFpuMov128(rd.nr(), tmpRes.nr())
+ mov.asFpuMov128(rd, tmpRes)
m.insert(mov)
}
}
@@ -1203,7 +1203,7 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) {
// BSL modifies the destination register, so we need to use a temporary register so that
// the actual definition of the destination register happens *after* the BSL instruction.
// That way, we can force the spill instruction to be inserted after the BSL instruction.
- tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp := m.compiler.AllocateVReg(ssa.TypeV128)
fcmgt := m.allocateInstr()
if max {
@@ -1220,17 +1220,17 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) {
res := operandNR(m.compiler.VRegOf(instr.Return()))
mov2 := m.allocateInstr()
- mov2.asFpuMov128(res.nr(), tmp.nr())
+ mov2.asFpuMov128(res.nr(), tmp)
m.insert(mov2)
}
-func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) {
+func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) {
div := m.allocateInstr()
if signed {
- div.asALU(aluOpSDiv, rd, rn, rm, _64bit)
+ div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit)
} else {
- div.asALU(aluOpUDiv, rd, rn, rm, _64bit)
+ div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit)
}
m.insert(div)
@@ -1239,11 +1239,11 @@ func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
// rd = rn-rd*rm by MSUB instruction.
msub := m.allocateInstr()
- msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit)
+ msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit)
m.insert(msub)
}
-func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) {
+func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) {
div := m.allocateInstr()
if signed {
@@ -1260,7 +1260,7 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
// We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1"
minusOneCheck := m.allocateInstr()
// Sets eq condition if rm == -1.
- minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit)
+ minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit)
m.insert(minusOneCheck)
ccmp := m.allocateInstr()
@@ -1290,20 +1290,20 @@ func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, c
func (m *machine) lowerFcopysign(x, y, ret ssa.Value) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- var tmpI, tmpF operand
+ var tmpI, tmpF regalloc.VReg
_64 := x.Type() == ssa.TypeF64
if _64 {
- tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
- tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+ tmpF = m.compiler.AllocateVReg(ssa.TypeF64)
+ tmpI = m.compiler.AllocateVReg(ssa.TypeI64)
} else {
- tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32))
- tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+ tmpF = m.compiler.AllocateVReg(ssa.TypeF32)
+ tmpI = m.compiler.AllocateVReg(ssa.TypeI32)
}
rd := m.compiler.VRegOf(ret)
- m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64)
+ m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64)
}
-func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) {
+func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) {
// This is exactly the same code emitted by GCC for "__builtin_copysign":
//
// mov x0, -9223372036854775808
@@ -1313,26 +1313,26 @@ func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool
setMSB := m.allocateInstr()
if _64bit {
- m.lowerConstantI64(tmpI.nr(), math.MinInt64)
- setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0))
+ m.lowerConstantI64(tmpI, math.MinInt64)
+ setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0))
} else {
- m.lowerConstantI32(tmpI.nr(), math.MinInt32)
- setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0))
+ m.lowerConstantI32(tmpI, math.MinInt32)
+ setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0))
}
m.insert(setMSB)
- tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
+ tmpReg := m.compiler.AllocateVReg(ssa.TypeF64)
mov := m.allocateInstr()
- mov.asFpuMov64(tmpReg.nr(), rn.nr())
+ mov.asFpuMov64(tmpReg, rn.nr())
m.insert(mov)
vbit := m.allocateInstr()
- vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B)
+ vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B)
m.insert(vbit)
movDst := m.allocateInstr()
- movDst.asFpuMov64(rd.nr(), tmpReg.nr())
+ movDst.asFpuMov64(rd, tmpReg)
m.insert(movDst)
}
@@ -1340,7 +1340,7 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) {
v, dstType := instr.BitcastData()
srcType := v.Type()
rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
srcInt := srcType.IsInt()
dstInt := dstType.IsInt()
switch {
@@ -1371,14 +1371,14 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) {
func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone)
- rd := operandNR(m.compiler.VRegOf(out))
+ rd := m.compiler.VRegOf(out)
neg := m.allocateInstr()
neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64)
m.insert(neg)
}
-func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) {
+func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) {
if !nonTrapping {
// First of all, we have to clear the FPU flags.
flagClear := m.allocateInstr()
@@ -1405,7 +1405,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
// Check if the conversion was undefined by comparing the status with 1.
// See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register
alu := m.allocateInstr()
- alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true)
+ alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true)
m.insert(alu)
// If it is not undefined, we can return the result.
@@ -1429,7 +1429,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
}
}
-func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) {
+func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) {
cvt := m.allocateInstr()
cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit)
m.insert(cvt)
@@ -1456,7 +1456,7 @@ func (m *machine) lowerFpuBinOp(si *ssa.Instruction) {
xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
rn := m.getOperand_NR(xDef, extModeNone)
rm := m.getOperand_NR(yDef, extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64)
m.insert(instr)
}
@@ -1482,7 +1482,7 @@ func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) {
case !add && yNegated: // rn+rm = x-(-y) = x-y
aop = aluOpAdd
}
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
alu := m.allocateInstr()
alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64)
m.insert(alu)
@@ -1527,7 +1527,7 @@ func (m *machine) lowerIcmp(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)
rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext)
alu := m.allocateInstr()
- alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit)
+ alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit)
m.insert(alu)
cset := m.allocateInstr()
@@ -1542,7 +1542,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
switch flag {
case eq:
@@ -1554,7 +1554,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) {
cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr)
m.insert(cmp)
not := m.allocateInstr()
- not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B)
+ not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B)
m.insert(not)
case ge:
cmp := m.allocateInstr()
@@ -1598,7 +1598,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
switch flag {
case eq:
@@ -1610,7 +1610,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {
cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr)
m.insert(cmp)
not := m.allocateInstr()
- not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B)
+ not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B)
m.insert(not)
case ge:
cmp := m.allocateInstr()
@@ -1631,7 +1631,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {
}
}
-func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) {
+func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) {
cvt := m.allocateInstr()
if signed {
cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr)
@@ -1643,15 +1643,15 @@ func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool
if arr == vecArrangement2D {
narrow := m.allocateInstr()
if signed {
- narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S)
+ narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S)
} else {
- narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S)
+ narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S)
}
m.insert(narrow)
}
}
-func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) {
+func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) {
cvt := m.allocateInstr()
if signed {
cvt.asVecMisc(vecOpScvtf, rd, rn, arr)
@@ -1665,7 +1665,7 @@ func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) {
x, amount := si.Arg2()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)
rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits())
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
alu := m.allocateInstr()
alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64)
@@ -1678,11 +1678,11 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult
xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
rn := m.getOperand_NR(xDef, extModeNone)
- var rd operand
+ var rd regalloc.VReg
if ignoreResult {
- rd = operandNR(xzrVReg)
+ rd = xzrVReg
} else {
- rd = operandNR(m.compiler.VRegOf(si.Return()))
+ rd = m.compiler.VRegOf(si.Return())
}
_64 := x.Type().Bits() == 64
@@ -1691,7 +1691,7 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult
c := instr.ConstantVal()
if isBitMaskImmediate(c, _64) {
// Constant bit wise operations can be lowered to a single instruction.
- alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64)
+ alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64)
m.insert(alu)
return
}
@@ -1709,25 +1709,25 @@ func (m *machine) lowerRotl(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- var tmp operand
+ var tmp regalloc.VReg
if _64 {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI64)
} else {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI32)
}
- rd := operandNR(m.compiler.VRegOf(r))
+ rd := m.compiler.VRegOf(r)
// Encode rotl as neg + rotr: neg is a sub against the zero-reg.
m.lowerRotlImpl(rd, rn, rm, tmp, _64)
}
-func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) {
+func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) {
// Encode rotl as neg + rotr: neg is a sub against the zero-reg.
neg := m.allocateInstr()
neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit)
m.insert(neg)
alu := m.allocateInstr()
- alu.asALU(aluOpRotR, rd, rn, tmp, is64bit)
+ alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit)
m.insert(alu)
}
@@ -1737,7 +1737,7 @@ func (m *machine) lowerRotr(si *ssa.Instruction) {
xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
rn := m.getOperand_NR(xDef, extModeNone)
rm := m.getOperand_NR(yDef, extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
alu := m.allocateInstr()
alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64)
@@ -1797,7 +1797,7 @@ func (m *machine) lowerImul(x, y, result ssa.Value) {
// TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg.
mul := m.allocateInstr()
- mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64)
+ mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64)
m.insert(mul)
}
@@ -1849,22 +1849,22 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) {
// mov x5, v0.d[0] ;; finally we mov the result back to a GPR
//
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
ins := m.allocateInstr()
- ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0))
+ ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0))
m.insert(ins)
rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
cnt := m.allocateInstr()
- cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B)
+ cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B)
m.insert(cnt)
rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
uaddlv := m.allocateInstr()
- uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B)
+ uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B)
m.insert(uaddlv)
mov := m.allocateInstr()
@@ -1879,32 +1879,35 @@ func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.Ex
loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true)
setExitCode := m.allocateInstr()
- setExitCode.asStore(operandNR(tmpReg1),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
- }, 32)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
+ }
+ setExitCode.asStore(operandNR(tmpReg1), mode, 32)
// In order to unwind the stack, we also need to push the current stack pointer:
tmp2 := m.compiler.AllocateVReg(ssa.TypeI64)
movSpToTmp := m.allocateInstr()
movSpToTmp.asMove64(tmp2, spVReg)
strSpToExecCtx := m.allocateInstr()
- strSpToExecCtx.asStore(operandNR(tmp2),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
- }, 64)
+ mode2 := m.amodePool.Allocate()
+ *mode2 = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
+ }
+ strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64)
// Also the address of this exit.
tmp3 := m.compiler.AllocateVReg(ssa.TypeI64)
currentAddrToTmp := m.allocateInstr()
currentAddrToTmp.asAdr(tmp3, 0)
storeCurrentAddrToExecCtx := m.allocateInstr()
- storeCurrentAddrToExecCtx.asStore(operandNR(tmp3),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
- }, 64)
+ mode3 := m.amodePool.Allocate()
+ *mode3 = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
+ }
+ storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64)
exitSeq := m.allocateInstr()
exitSeq.asExitSequence(execCtxVReg)
@@ -1937,7 +1940,7 @@ func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) {
alu.asALU(
aluOpSubS,
// We don't need the result, just need to set flags.
- operandNR(xzrVReg),
+ xzrVReg,
rn,
rm,
x.Type().Bits() == 64,
@@ -2012,7 +2015,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
alu.asALU(
aluOpSubS,
// We don't need the result, just need to set flags.
- operandNR(xzrVReg),
+ xzrVReg,
rn,
operandNR(xzrVReg),
c.Type().Bits() == 64,
@@ -2024,7 +2027,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
switch x.Type() {
case ssa.TypeI32, ssa.TypeI64:
// csel rd, rn, rm, cc
@@ -2041,10 +2044,10 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
}
}
-func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
+func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) {
// First check if `rc` is zero or not.
checkZero := m.allocateInstr()
- checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false)
+ checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false)
m.insert(checkZero)
// Then use CSETM to set all bits to one if `rc` is zero.
@@ -2054,7 +2057,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
m.insert(cset)
// Then move the bits to the result vector register.
- tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp2 := m.compiler.AllocateVReg(ssa.TypeV128)
dup := m.allocateInstr()
dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D)
m.insert(dup)
@@ -2067,7 +2070,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
// Finally, move the result to the destination register.
mov2 := m.allocateInstr()
- mov2.asFpuMov128(rd.nr(), tmp2.nr())
+ mov2.asFpuMov128(rd, tmp2)
m.insert(mov2)
}
@@ -2099,28 +2102,28 @@ func (m *machine) lowerAtomicRmw(si *ssa.Instruction) {
addr, val := si.Arg2()
addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val)
rn := m.getOperand_NR(addrDef, extModeNone)
- rt := operandNR(m.compiler.VRegOf(si.Return()))
+ rt := m.compiler.VRegOf(si.Return())
rs := m.getOperand_NR(valDef, extModeNone)
_64 := si.Return().Type().Bits() == 64
- var tmp operand
+ var tmp regalloc.VReg
if _64 {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI64)
} else {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI32)
}
- m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64)
+ m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64)
}
-func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) {
+func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) {
switch {
case negateArg:
neg := m.allocateInstr()
- neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit)
+ neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit)
m.insert(neg)
case flipArg:
flip := m.allocateInstr()
- flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit)
+ flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit)
m.insert(flip)
default:
tmp = rs
@@ -2139,32 +2142,32 @@ func (m *machine) lowerAtomicCas(si *ssa.Instruction) {
rn := m.getOperand_NR(addrDef, extModeNone)
rt := m.getOperand_NR(replDef, extModeNone)
rs := m.getOperand_NR(expDef, extModeNone)
- tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type()))
+ tmp := m.compiler.AllocateVReg(si.Return().Type())
_64 := si.Return().Type().Bits() == 64
// rs is overwritten by CAS, so we need to move it to the result register before the instruction
// in case when it is used somewhere else.
mov := m.allocateInstr()
if _64 {
- mov.asMove64(tmp.nr(), rs.nr())
+ mov.asMove64(tmp, rs.nr())
} else {
- mov.asMove32(tmp.nr(), rs.nr())
+ mov.asMove32(tmp, rs.nr())
}
m.insert(mov)
- m.lowerAtomicCasImpl(rn, tmp, rt, size)
+ m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size)
mov2 := m.allocateInstr()
rd := m.compiler.VRegOf(si.Return())
if _64 {
- mov2.asMove64(rd, tmp.nr())
+ mov2.asMove64(rd, tmp)
} else {
- mov2.asMove32(rd, tmp.nr())
+ mov2.asMove32(rd, tmp)
}
m.insert(mov2)
}
-func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) {
+func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) {
cas := m.allocateInstr()
cas.asAtomicCas(rn, rs, rt, size)
m.insert(cas)
@@ -2176,12 +2179,12 @@ func (m *machine) lowerAtomicLoad(si *ssa.Instruction) {
addrDef := m.compiler.ValueDefinition(addr)
rn := m.getOperand_NR(addrDef, extModeNone)
- rt := operandNR(m.compiler.VRegOf(si.Return()))
+ rt := m.compiler.VRegOf(si.Return())
- m.lowerAtomicLoadImpl(rn, rt, size)
+ m.lowerAtomicLoadImpl(rn.nr(), rt, size)
}
-func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) {
+func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) {
ld := m.allocateInstr()
ld.asAtomicLoad(rn, rt, size)
m.insert(ld)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
index 4842eaa38..fd0760d72 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
@@ -24,6 +24,14 @@ type (
addressModeKind byte
)
+func resetAddressMode(a *addressMode) {
+ a.kind = 0
+ a.rn = 0
+ a.rm = 0
+ a.extOp = 0
+ a.imm = 0
+}
+
const (
// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
// and then scaled by bits(type)/8.
@@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) {
return
}
-func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
+func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode {
if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
}
+ mode := m.amodePool.Allocate()
if preIndex {
- return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
+ *mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
} else {
- return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
}
+ return mode
}
func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
@@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret
amode := m.lowerToAddressMode(ptr, offset, size)
load := m.allocateInstr()
if signed {
- load.asSLoad(operandNR(ret), amode, size)
+ load.asSLoad(ret, amode, size)
} else {
- load.asULoad(operandNR(ret), amode, size)
+ load.asULoad(ret, amode, size)
}
m.insert(load)
}
@@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.
load := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
- load.asULoad(operandNR(dst), amode, typ.Bits())
+ load.asULoad(dst, amode, typ.Bits())
case ssa.TypeF32, ssa.TypeF64:
- load.asFpuLoad(operandNR(dst), amode, typ.Bits())
+ load.asFpuLoad(dst, amode, typ.Bits())
case ssa.TypeV128:
- load.asFpuLoad(operandNR(dst), amode, 128)
+ load.asFpuLoad(dst, amode, 128)
default:
panic("TODO")
}
@@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane,
m.lowerConstantI64(offsetReg, int64(offset))
addedBase := m.addReg64ToReg64(base, offsetReg)
- rd := operandNR(m.compiler.VRegOf(ret))
+ rd := m.compiler.VRegOf(ret)
ld1r := m.allocateInstr()
ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
@@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) {
}
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
-func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
+func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) {
// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
// to support more efficient address resolution.
@@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte
// During the construction, this might emit additional instructions.
//
// Extracted as a separate function for easy testing.
-func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
+func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) {
+ amode = m.amodePool.Allocate()
switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
case a64sExist && a32sExist:
var base regalloc.VReg
base = a64s.Dequeue()
var a32 addend32
a32 = a32s.Dequeue()
- amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
+ *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
var base regalloc.VReg
base = a64s.Dequeue()
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
offset = 0
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
var base regalloc.VReg
base = a64s.Dequeue()
- amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
offset = 0
case a64sExist:
var base regalloc.VReg
base = a64s.Dequeue()
if !a64s.Empty() {
index := a64s.Dequeue()
- amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
+ *amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
} else {
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
}
case a32sExist:
base32 := a32s.Dequeue()
@@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32],
if !a32s.Empty() {
index := a32s.Dequeue()
- amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
+ *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
} else {
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
}
default: // Only static offsets.
tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
m.lowerConstantI64(tmpReg, offset)
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
offset = 0
}
@@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
if imm12Op, ok := asImm12Operand(uint64(c)); ok {
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
+ alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true)
} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
- alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
+ alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true)
} else {
tmp := m.compiler.AllocateVReg(ssa.TypeI64)
m.load64bitConst(c, tmp)
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
+ alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true)
}
m.insert(alu)
return
@@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
+ alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true)
m.insert(alu)
return
}
@@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
+ alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true)
m.insert(alu)
return
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
index b435d9ba9..5f584f928 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
@@ -21,6 +21,8 @@ type (
regAlloc regalloc.Allocator
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
+ amodePool wazevoapi.Pool[addressMode]
+
// addendsWorkQueue is used during address lowering, defined here for reuse.
addendsWorkQueue wazevoapi.Queue[ssa.Value]
addends32 wazevoapi.Queue[addend32]
@@ -105,6 +107,7 @@ func NewBackend() backend.Machine {
spillSlots: make(map[regalloc.VRegID]int64),
executableContext: newExecutableContext(),
regAlloc: regalloc.NewAllocator(regInfo),
+ amodePool: wazevoapi.NewPool[addressMode](resetAddressMode),
}
return m
}
@@ -149,6 +152,7 @@ func (m *machine) Reset() {
m.maxRequiredStackSizeForCalls = 0
m.executableContext.Reset()
m.jmpTableTargets = m.jmpTableTargets[:0]
+ m.amodePool.Reset()
}
// SetCurrentABI implements backend.Machine SetCurrentABI.
@@ -183,9 +187,8 @@ func (m *machine) allocateBrTarget() (nop *instruction, l label) {
l = ectx.AllocateLabel()
nop = m.allocateInstr()
nop.asNop0WithLabel(l)
- pos := ectx.AllocateLabelPosition(l)
+ pos := ectx.GetOrAllocateLabelPosition(l)
pos.Begin, pos.End = nop, nop
- ectx.LabelPositions[l] = pos
return
}
@@ -209,7 +212,7 @@ func (m *machine) allocateNop() *instruction {
}
func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
- amode := &i.amode
+ amode := i.getAmode()
switch amode.kind {
case addressModeKindResultStackSpace:
amode.imm += ret0offset
@@ -281,7 +284,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {
switch cur.kind {
case nop0:
l := cur.nop0Label()
- if pos, ok := ectx.LabelPositions[l]; ok {
+ if pos := ectx.LabelPositions[l]; pos != nil {
pos.BinaryOffset = offset + size
}
case condBr:
@@ -428,8 +431,10 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *
func (m *machine) Format() string {
ectx := m.executableContext
begins := map[*instruction]label{}
- for l, pos := range ectx.LabelPositions {
- begins[pos.Begin] = l
+ for _, pos := range ectx.LabelPositions {
+ if pos != nil {
+ begins[pos.Begin] = pos.L
+ }
}
irBlocks := map[label]ssa.BasicBlockID{}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
index 466fac464..d9032f921 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
@@ -70,7 +70,7 @@ func (m *machine) setupPrologue() {
// +-----------------+ <----- SP
// (low address)
//
- _amode := addressModePreOrPostIndex(spVReg,
+ _amode := addressModePreOrPostIndex(m, spVReg,
-16, // stack pointer must be 16-byte aligned.
true, // Decrement before store.
)
@@ -159,7 +159,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc
sizeOfArgRetReg = tmpRegVReg
subSp := m.allocateInstr()
- subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
+ subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
cur = linkInstr(cur, subSp)
} else {
sizeOfArgRetReg = xzrVReg
@@ -168,7 +168,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc
// Saves the return address (lr) and the size_of_arg_ret below the SP.
// size_of_arg_ret is used for stack unwinding.
pstr := m.allocateInstr()
- amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
+ amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */)
pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
cur = linkInstr(cur, pstr)
return cur
@@ -182,7 +182,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
} else {
frameSizeReg = xzrVReg
}
- _amode := addressModePreOrPostIndex(spVReg,
+ _amode := addressModePreOrPostIndex(m, spVReg,
-16, // stack pointer must be 16-byte aligned.
true, // Decrement before store.
)
@@ -213,7 +213,7 @@ func (m *machine) postRegAlloc() {
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
default:
// Removes the redundant copy instruction.
- if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
+ if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() {
prev, next := cur.prev, cur.next
// Remove the copy instruction.
prev.next = next
@@ -286,16 +286,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
for i := range m.clobberedRegs {
vr := m.clobberedRegs[l-i] // reverse order to restore.
load := m.allocateInstr()
- amode := addressModePreOrPostIndex(spVReg,
+ amode := addressModePreOrPostIndex(m, spVReg,
16, // stack pointer must be 16-byte aligned.
false, // Increment after store.
)
// TODO: pair loads to reduce the number of instructions.
switch regTypeToRegisterSizeInBits(vr.RegType()) {
case 64: // save int reg.
- load.asULoad(operandNR(vr), amode, 64)
+ load.asULoad(vr, amode, 64)
case 128: // save vector reg.
- load.asFpuLoad(operandNR(vr), amode, 128)
+ load.asFpuLoad(vr, amode, 128)
}
cur = linkInstr(cur, load)
}
@@ -317,8 +317,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
// SP----> +-----------------+
ldr := m.allocateInstr()
- ldr.asULoad(operandNR(lrVReg),
- addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+ ldr.asULoad(lrVReg,
+ addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
cur = linkInstr(cur, ldr)
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
@@ -351,14 +351,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
// sub tmp, sp, #requiredStackSize
sub := m.allocateInstr()
- sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
+ sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true)
cur = linkInstr(cur, sub)
} else {
// This case, we first load the requiredStackSize into the temporary register,
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
// Then subtract it.
sub := m.allocateInstr()
- sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
+ sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true)
cur = linkInstr(cur, sub)
}
@@ -366,16 +366,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
// ldr tmp2, [executionContext #StackBottomPtr]
ldr := m.allocateInstr()
- ldr.asULoad(operandNR(tmp2), addressMode{
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: x0VReg, // execution context is always the first argument.
imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
- }, 64)
+ }
+ ldr.asULoad(tmp2, amode, 64)
cur = linkInstr(cur, ldr)
// subs xzr, tmp, tmp2
subs := m.allocateInstr()
- subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
+ subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true)
cur = linkInstr(cur, subs)
// b.ge #imm
@@ -388,22 +390,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
// First load the requiredStackSize into the temporary register,
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
setRequiredStackSize := m.allocateInstr()
- setRequiredStackSize.asStore(operandNR(tmpRegVReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
- }, 64)
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
+ }
+ setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64)
cur = linkInstr(cur, setRequiredStackSize)
}
ldrAddress := m.allocateInstr()
- ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
+ amode2 := m.amodePool.Allocate()
+ *amode2 = addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: x0VReg, // execution context is always the first argument
imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
- }, 64)
+ }
+ ldrAddress.asULoad(tmpRegVReg, amode2, 64)
cur = linkInstr(cur, ldrAddress)
// Then jumps to the stack grow call sequence's address, meaning
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
index 1c8793b73..c7eb92cc2 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
@@ -91,7 +91,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft
}
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
- var amode addressMode
+ var amode *addressMode
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
store := m.allocateInstr()
store.asStore(operandNR(v), amode, typ.Bits())
@@ -116,16 +116,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af
}
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
- var amode addressMode
+ var amode *addressMode
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
load := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
- load.asULoad(operandNR(v), amode, typ.Bits())
+ load.asULoad(v, amode, typ.Bits())
case ssa.TypeF32, ssa.TypeF64:
- load.asFpuLoad(operandNR(v), amode, typ.Bits())
+ load.asFpuLoad(v, amode, typ.Bits())
case ssa.TypeV128:
- load.asFpuLoad(operandNR(v), amode, 128)
+ load.asFpuLoad(v, amode, 128)
default:
panic("TODO")
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
index 3f36c84e5..655370786 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
@@ -35,7 +35,7 @@ type (
iter int
reversePostOrderBlocks []RegAllocBlock[I, m]
// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
- labelToRegAllocBlockIndex map[Label]int
+ labelToRegAllocBlockIndex [] /* Label to */ int
loopNestingForestRoots []ssa.BasicBlock
}
@@ -56,10 +56,9 @@ type (
// NewRegAllocFunction returns a new RegAllocFunction.
func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
return &RegAllocFunction[I, M]{
- m: m,
- ssb: ssb,
- c: c,
- labelToRegAllocBlockIndex: make(map[Label]int),
+ m: m,
+ ssb: ssb,
+ c: c,
}
}
@@ -74,6 +73,9 @@ func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end
end: end,
id: int(sb.ID()),
})
+ if len(f.labelToRegAllocBlockIndex) <= int(l) {
+ f.labelToRegAllocBlockIndex = append(f.labelToRegAllocBlockIndex, make([]int, int(l)-len(f.labelToRegAllocBlockIndex)+1)...)
+ }
f.labelToRegAllocBlockIndex[l] = i
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
index b4450d56f..eacb6a7ef 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
@@ -60,9 +60,8 @@ type (
phiDefInstListPool wazevoapi.Pool[phiDefInstList]
// Followings are re-used during various places.
- blks []Block
- reals []RealReg
- currentOccupants regInUseSet
+ blks []Block
+ reals []RealReg
// Following two fields are updated while iterating the blocks in the reverse postorder.
state state
@@ -755,7 +754,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
killSet := a.reals[:0]
// Gather the set of registers that will be used in the current instruction.
- for _, use := range instr.Uses(&a.vs) {
+ uses := instr.Uses(&a.vs)
+ for _, use := range uses {
if use.IsRealReg() {
r := use.RealReg()
currentUsedSet = currentUsedSet.add(r)
@@ -770,7 +770,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
}
}
- for i, use := range instr.Uses(&a.vs) {
+ for i, use := range uses {
if !use.IsRealReg() {
vs := s.getVRegState(use.ID())
killed := vs.lastUse == pc
@@ -944,8 +944,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
s := &a.state
- for i := 0; i < 64; i++ {
- allocated := RealReg(i)
+ for allocated := RealReg(0); allocated < 64; allocated++ {
if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
continue
}
@@ -974,11 +973,10 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
bID := blk.ID()
blkSt := a.getOrAllocateBlockState(bID)
desiredOccupants := &blkSt.startRegs
- aliveOnRegVRegs := make(map[VReg]RealReg)
- for i := 0; i < 64; i++ {
- r := RealReg(i)
- if v := blkSt.startRegs.get(r); v.Valid() {
- aliveOnRegVRegs[v] = r
+ var desiredOccupantsSet RegSet
+ for i, v := range desiredOccupants {
+ if v != VRegInvalid {
+ desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))
}
}
@@ -987,56 +985,38 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
}
s.currentBlockID = bID
- a.updateLiveInVRState(a.getOrAllocateBlockState(bID))
+ a.updateLiveInVRState(blkSt)
- currentOccupants := &a.currentOccupants
for i := 0; i < preds; i++ {
- currentOccupants.reset()
if i == blkSt.startFromPredIndex {
continue
}
- currentOccupantsRev := make(map[VReg]RealReg)
pred := blk.Pred(i)
predSt := a.getOrAllocateBlockState(pred.ID())
- for ii := 0; ii < 64; ii++ {
- r := RealReg(ii)
- if v := predSt.endRegs.get(r); v.Valid() {
- if _, ok := aliveOnRegVRegs[v]; !ok {
- continue
- }
- currentOccupants.add(r, v)
- currentOccupantsRev[v] = r
- }
- }
s.resetAt(predSt)
// Finds the free registers if any.
intTmp, floatTmp := VRegInvalid, VRegInvalid
if intFree := s.findAllocatable(
- a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set,
+ a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet,
); intFree != RealRegInvalid {
intTmp = FromRealReg(intFree, RegTypeInt)
}
if floatFree := s.findAllocatable(
- a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set,
+ a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet,
); floatFree != RealRegInvalid {
floatTmp = FromRealReg(floatFree, RegTypeFloat)
}
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
- }
-
- for ii := 0; ii < 64; ii++ {
- r := RealReg(ii)
+ for r := RealReg(0); r < 64; r++ {
desiredVReg := desiredOccupants.get(r)
if !desiredVReg.Valid() {
continue
}
- currentVReg := currentOccupants.get(r)
+ currentVReg := s.regsInUse.get(r)
if desiredVReg.ID() == currentVReg.ID() {
continue
}
@@ -1048,86 +1028,95 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
} else {
tmpRealReg = floatTmp
}
- a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ)
+ a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ)
}
}
}
+// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`.
+//
+// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor.
+// - desiredVReg is the desired VReg value that should be on the register `r`.
+// - freeReg is the temporary register that can be used to swap the values, which may or may not be used.
+// - typ is the register type of the `r`.
func (a *Allocator) reconcileEdge(f Function,
r RealReg,
pred Block,
- currentOccupants *regInUseSet,
- currentOccupantsRev map[VReg]RealReg,
currentVReg, desiredVReg VReg,
freeReg VReg,
typ RegType,
) {
+ // There are four cases to consider:
+ // 1. currentVReg is valid, but desiredVReg is on the stack.
+ // 2. Both currentVReg and desiredVReg are valid.
+ // 3. Desired is on a different register than `r` and currentReg is not valid.
+ // 4. Desired is on the stack and currentReg is not valid.
+
s := &a.state
if currentVReg.Valid() {
- // Both are on reg.
- er, ok := currentOccupantsRev[desiredVReg]
- if !ok {
+ desiredState := s.getVRegState(desiredVReg.ID())
+ er := desiredState.r
+ if er == RealRegInvalid {
+ // Case 1: currentVReg is valid, but desiredVReg is on the stack.
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
desiredVReg.ID(), a.regInfo.RealRegName(r),
)
}
- // This case is that the desired value is on the stack, but currentVReg is on the target register.
- // We need to move the current value to the stack, and reload the desired value.
+ // We need to move the current value to the stack, and reload the desired value into the register.
// TODO: we can do better here.
f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
- delete(currentOccupantsRev, currentVReg)
+ s.releaseRealReg(r)
s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
- currentOccupants.add(r, desiredVReg)
- currentOccupantsRev[desiredVReg] = r
+ s.useRealReg(r, desiredVReg)
return
- }
-
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
- desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
+ } else {
+ // Case 2: Both currentVReg and desiredVReg are valid.
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
+ desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
+ )
+ }
+ // This case, we need to swap the values between the current and desired values.
+ f.SwapBefore(
+ currentVReg.SetRealReg(r),
+ desiredVReg.SetRealReg(er),
+ freeReg,
+ pred.LastInstrForInsertion(),
)
- }
- f.SwapBefore(
- currentVReg.SetRealReg(r),
- desiredVReg.SetRealReg(er),
- freeReg,
- pred.LastInstrForInsertion(),
- )
- s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
- currentOccupantsRev[desiredVReg] = r
- currentOccupantsRev[currentVReg] = er
- currentOccupants.add(r, desiredVReg)
- currentOccupants.add(er, currentVReg)
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
+ s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
+ s.releaseRealReg(r)
+ s.releaseRealReg(er)
+ s.useRealReg(r, desiredVReg)
+ s.useRealReg(er, currentVReg)
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
+ }
}
} else {
- // Desired is on reg, but currently the target register is not used.
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
desiredVReg.ID(), a.regInfo.RealRegName(r),
)
}
- if currentReg, ok := currentOccupantsRev[desiredVReg]; ok {
+ if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid {
+ // Case 3: Desired is on a different register than `r` and currentReg is not valid.
+ // We simply need to move the desired value to the register.
f.InsertMoveBefore(
FromRealReg(r, typ),
desiredVReg.SetRealReg(currentReg),
pred.LastInstrForInsertion(),
)
- currentOccupants.remove(currentReg)
+ s.releaseRealReg(currentReg)
} else {
+ // Case 4: Both currentVReg and desiredVReg are not valid.
+ // We simply need to reload the desired value into the register.
s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
}
- currentOccupantsRev[desiredVReg] = r
- currentOccupants.add(r, desiredVReg)
- }
-
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
+ s.useRealReg(r, desiredVReg)
}
}
@@ -1169,8 +1158,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
}
for pos != definingBlk {
st := a.getOrAllocateBlockState(pos.ID())
- for ii := 0; ii < 64; ii++ {
- rr := RealReg(ii)
+ for rr := RealReg(0); rr < 64; rr++ {
if st.startRegs.get(rr) == v {
r = rr
// Already in the register, so we can place the spill at the beginning of the block.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
index e9bf60661..04a8e8f4d 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
@@ -46,23 +46,24 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
}
}
-type regInUseSet struct {
- set RegSet
- vrs [64]VReg
+type regInUseSet [64]VReg
+
+func newRegInUseSet() regInUseSet {
+ var ret regInUseSet
+ ret.reset()
+ return ret
}
func (rs *regInUseSet) reset() {
- rs.set = 0
- for i := range rs.vrs {
- rs.vrs[i] = VRegInvalid
+ for i := range rs {
+ rs[i] = VRegInvalid
}
}
func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
var ret []string
- for i := 0; i < 64; i++ {
- if rs.set&(1<<uint(i)) != 0 {
- vr := rs.vrs[i]
+ for i, vr := range rs {
+ if vr != VRegInvalid {
ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
}
}
@@ -70,39 +71,28 @@ func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
}
func (rs *regInUseSet) has(r RealReg) bool {
- if r >= 64 {
- return false
- }
- return rs.set&(1<<uint(r)) != 0
+ return r < 64 && rs[r] != VRegInvalid
}
func (rs *regInUseSet) get(r RealReg) VReg {
- if r >= 64 {
- return VRegInvalid
- }
- return rs.vrs[r]
+ return rs[r]
}
func (rs *regInUseSet) remove(r RealReg) {
- if r >= 64 {
- return
- }
- rs.set &= ^(1 << uint(r))
- rs.vrs[r] = VRegInvalid
+ rs[r] = VRegInvalid
}
func (rs *regInUseSet) add(r RealReg, vr VReg) {
if r >= 64 {
return
}
- rs.set |= 1 << uint(r)
- rs.vrs[r] = vr
+ rs[r] = vr
}
func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
- for i := 0; i < 64; i++ {
- if rs.set&(1<<uint(i)) != 0 {
- f(RealReg(i), rs.vrs[i])
+ for i, vr := range rs {
+ if vr != VRegInvalid {
+ f(RealReg(i), vr)
}
}
}