summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--go.mod4
-rw-r--r--go.sum8
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/internal/util/json.go2
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/json.go3
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/pointer.go3
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/stmt.go2
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/value.go2
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go29
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/vfs/shm.go3
-rw-r--r--vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go16
-rw-r--r--vendor/github.com/tetratelabs/wazero/config.go19
-rw-r--r--vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go13
-rw-r--r--vendor/github.com/tetratelabs/wazero/experimental/listener.go6
-rw-r--r--vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go6
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go3
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go28
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go16
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go30
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go29
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go119
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go438
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go136
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go6
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go379
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go59
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go17
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go51
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go10
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go12
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go140
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go44
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go18
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go7
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go21
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go12
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go31
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go73
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go104
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go41
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go16
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go49
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go3
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go4
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go5
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go36
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go9
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go2
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go2
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go2
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go3
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go3
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go7
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go23
-rw-r--r--vendor/github.com/tetratelabs/wazero/internal/wasm/module.go7
-rw-r--r--vendor/modules.txt4
55 files changed, 1075 insertions, 1040 deletions
diff --git a/go.mod b/go.mod
index 1fe0bf5d2..149a88117 100644
--- a/go.mod
+++ b/go.mod
@@ -44,7 +44,7 @@ require (
github.com/miekg/dns v1.1.59
github.com/minio/minio-go/v7 v7.0.71
github.com/mitchellh/mapstructure v1.5.0
- github.com/ncruces/go-sqlite3 v0.16.1
+ github.com/ncruces/go-sqlite3 v0.16.2
github.com/oklog/ulid v1.3.1
github.com/prometheus/client_golang v1.19.1
github.com/spf13/cobra v1.8.0
@@ -199,7 +199,7 @@ require (
github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe // indirect
github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB // indirect
github.com/tdewolff/parse/v2 v2.7.14 // indirect
- github.com/tetratelabs/wazero v1.7.2 // indirect
+ github.com/tetratelabs/wazero v1.7.3 // indirect
github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc // indirect
github.com/toqueteos/webbrowser v1.2.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
diff --git a/go.sum b/go.sum
index 81d950a8b..df382c545 100644
--- a/go.sum
+++ b/go.sum
@@ -445,8 +445,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/moul/http2curl v1.0.0 h1:dRMWoAtb+ePxMlLkrCbAqh4TlPHXvoGUSQ323/9Zahs=
github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=
-github.com/ncruces/go-sqlite3 v0.16.1 h1:1wHv7s8y+fWK44UIliotJ42ZV41A5T0sjIAqGmnMrkc=
-github.com/ncruces/go-sqlite3 v0.16.1/go.mod h1:feFXbBcbLtxNk6XWG1ROt8MS9+E45yCW3G8o4ixIqZ8=
+github.com/ncruces/go-sqlite3 v0.16.2 h1:HesVRr0BC6QSGSEQfEXOntFWS9wd4Z8ms4nJzfUv4Rg=
+github.com/ncruces/go-sqlite3 v0.16.2/go.mod h1:wkUIvOrAjFQnefVlivJfcowKUcfMHs4mvLfhVanzHHI=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M=
@@ -562,8 +562,8 @@ github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 h1:IkjBCtQOOjIn03
github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739/go.mod h1:XPuWBzvdUzhCuxWO1ojpXsyzsA5bFoS3tO/Q3kFuTG8=
github.com/technologize/otel-go-contrib v1.1.1 h1:wZH9aSPNWZWIkEh3vfaKfMb15AJ80jJ1aVj/4GZdqIw=
github.com/technologize/otel-go-contrib v1.1.1/go.mod h1:dCN/wj2WyUO8aFZFdIN+6tfJHImjTML/8r2YVYAy3So=
-github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc=
-github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y=
+github.com/tetratelabs/wazero v1.7.3 h1:PBH5KVahrt3S2AHgEjKu4u+LlDbbk+nsGE3KLucy6Rw=
+github.com/tetratelabs/wazero v1.7.3/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y=
github.com/tidwall/btree v0.0.0-20191029221954-400434d76274 h1:G6Z6HvJuPjG6XfNGi/feOATzeJrfgTNJY+rGrHbA04E=
github.com/tidwall/btree v0.0.0-20191029221954-400434d76274/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8=
github.com/tidwall/buntdb v1.1.2 h1:noCrqQXL9EKMtcdwJcmuVKSEjqu1ua99RHHgbLTEHRo=
diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go
index c0ba38cf0..7f6849a42 100644
--- a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go
+++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go
@@ -26,7 +26,7 @@ func (j JSON) Scan(value any) error {
buf = v.AppendFormat(buf, time.RFC3339Nano)
buf = append(buf, '"')
case nil:
- buf = append(buf, "null"...)
+ buf = []byte("null")
default:
panic(AssertErr())
}
diff --git a/vendor/github.com/ncruces/go-sqlite3/json.go b/vendor/github.com/ncruces/go-sqlite3/json.go
index 9b2565e87..2b762c092 100644
--- a/vendor/github.com/ncruces/go-sqlite3/json.go
+++ b/vendor/github.com/ncruces/go-sqlite3/json.go
@@ -5,7 +5,8 @@ import "github.com/ncruces/go-sqlite3/internal/util"
// JSON returns a value that can be used as an argument to
// [database/sql.DB.Exec], [database/sql.Row.Scan] and similar methods to
// store value as JSON, or decode JSON into value.
-// JSON should NOT be used with [BindJSON] or [ResultJSON].
+// JSON should NOT be used with [Stmt.BindJSON], [Stmt.ColumnJSON],
+// [Value.JSON], or [Context.ResultJSON].
func JSON(value any) any {
return util.JSON{Value: value}
}
diff --git a/vendor/github.com/ncruces/go-sqlite3/pointer.go b/vendor/github.com/ncruces/go-sqlite3/pointer.go
index 611c1528c..0e2418b99 100644
--- a/vendor/github.com/ncruces/go-sqlite3/pointer.go
+++ b/vendor/github.com/ncruces/go-sqlite3/pointer.go
@@ -4,7 +4,8 @@ import "github.com/ncruces/go-sqlite3/internal/util"
// Pointer returns a pointer to a value that can be used as an argument to
// [database/sql.DB.Exec] and similar methods.
-// Pointer should NOT be used with [BindPointer] or [ResultPointer].
+// Pointer should NOT be used with [Stmt.BindPointer],
+// [Value.Pointer], or [Context.ResultPointer].
//
// https://sqlite.org/bindptr.html
func Pointer[T any](value T) any {
diff --git a/vendor/github.com/ncruces/go-sqlite3/stmt.go b/vendor/github.com/ncruces/go-sqlite3/stmt.go
index ac40e3802..381a7d06b 100644
--- a/vendor/github.com/ncruces/go-sqlite3/stmt.go
+++ b/vendor/github.com/ncruces/go-sqlite3/stmt.go
@@ -564,7 +564,7 @@ func (s *Stmt) ColumnJSON(col int, ptr any) error {
var data []byte
switch s.ColumnType(col) {
case NULL:
- data = append(data, "null"...)
+ data = []byte("null")
case TEXT:
data = s.ColumnRawText(col)
case BLOB:
diff --git a/vendor/github.com/ncruces/go-sqlite3/value.go b/vendor/github.com/ncruces/go-sqlite3/value.go
index d0edf215b..1894ff4f1 100644
--- a/vendor/github.com/ncruces/go-sqlite3/value.go
+++ b/vendor/github.com/ncruces/go-sqlite3/value.go
@@ -177,7 +177,7 @@ func (v Value) JSON(ptr any) error {
var data []byte
switch v.Type() {
case NULL:
- data = append(data, "null"...)
+ data = []byte("null")
case TEXT:
data = v.RawText()
case BLOB:
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go
index 8dc57ab9c..f21335d8e 100644
--- a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go
@@ -75,11 +75,6 @@ func (memVFS) FullPathname(name string) (string, error) {
type memDB struct {
name string
- // +checklocks:lockMtx
- pending *memFile
- // +checklocks:lockMtx
- reserved *memFile
-
// +checklocks:dataMtx
data []*[sectorSize]byte
@@ -88,6 +83,10 @@ type memDB struct {
// +checklocks:lockMtx
shared int
+ // +checklocks:lockMtx
+ reserved bool
+ // +checklocks:lockMtx
+ pending bool
// +checklocks:memoryMtx
refs int
@@ -214,24 +213,24 @@ func (m *memFile) Lock(lock vfs.LockLevel) error {
switch lock {
case vfs.LOCK_SHARED:
- if m.pending != nil {
+ if m.pending {
return sqlite3.BUSY
}
m.shared++
case vfs.LOCK_RESERVED:
- if m.reserved != nil {
+ if m.reserved {
return sqlite3.BUSY
}
- m.reserved = m
+ m.reserved = true
case vfs.LOCK_EXCLUSIVE:
if m.lock < vfs.LOCK_PENDING {
- if m.pending != nil {
+ if m.pending {
return sqlite3.BUSY
}
m.lock = vfs.LOCK_PENDING
- m.pending = m
+ m.pending = true
}
for before := time.Now(); m.shared > 1; {
@@ -256,11 +255,11 @@ func (m *memFile) Unlock(lock vfs.LockLevel) error {
m.lockMtx.Lock()
defer m.lockMtx.Unlock()
- if m.pending == m {
- m.pending = nil
+ if m.pending && m.lock >= vfs.LOCK_PENDING {
+ m.pending = false
}
- if m.reserved == m {
- m.reserved = nil
+ if m.reserved && m.lock >= vfs.LOCK_RESERVED {
+ m.reserved = false
}
if lock < vfs.LOCK_SHARED {
m.shared--
@@ -275,7 +274,7 @@ func (m *memFile) CheckReservedLock() (bool, error) {
}
m.lockMtx.Lock()
defer m.lockMtx.Unlock()
- return m.reserved != nil, nil
+ return m.reserved, nil
}
func (m *memFile) SectorSize() int {
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go
index 58da34df4..7b0d4b677 100644
--- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go
@@ -125,6 +125,9 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext
return 0, _IOERR_SHMMAP
}
s.regions = append(s.regions, r)
+ if s.readOnly {
+ return r.Ptr, _READONLY
+ }
return r.Ptr, _OK
}
diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go
index 3b45b3087..8c2abee81 100644
--- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go
+++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go
@@ -101,13 +101,13 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) {
return _OK
}
- // Open file read-write, as it will be shared.
+ // Always open file read-write, as it will be shared.
f, err := os.OpenFile(s.path,
unix.O_RDWR|unix.O_CREAT|unix.O_NOFOLLOW, 0666)
if err != nil {
return _CANTOPEN
}
- // Close if file if it's not nil.
+ // Closes file if it's not nil.
defer func() { f.Close() }()
fi, err := f.Stat()
@@ -145,17 +145,14 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) {
info: fi,
refs: 1,
}
- f = nil
- add := true
+ f = nil // Don't close the file.
for i, g := range vfsShmFiles {
if g == nil {
vfsShmFiles[i] = s.vfsShmFile
- add = false
+ return rc
}
}
- if add {
- vfsShmFiles = append(vfsShmFiles, s.vfsShmFile)
- }
+ vfsShmFiles = append(vfsShmFiles, s.vfsShmFile)
return rc
}
@@ -195,6 +192,9 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext
return 0, _IOERR_SHMMAP
}
s.regions = append(s.regions, r)
+ if s.readOnly {
+ return r.Ptr, _READONLY
+ }
return r.Ptr, _OK
}
diff --git a/vendor/github.com/tetratelabs/wazero/config.go b/vendor/github.com/tetratelabs/wazero/config.go
index 819a76df5..d3656849c 100644
--- a/vendor/github.com/tetratelabs/wazero/config.go
+++ b/vendor/github.com/tetratelabs/wazero/config.go
@@ -148,7 +148,7 @@ type RuntimeConfig interface {
// customSections := c.CustomSections()
WithCustomSections(bool) RuntimeConfig
- // WithCloseOnContextDone ensures the executions of functions to be closed under one of the following circumstances:
+ // WithCloseOnContextDone ensures the executions of functions to be terminated under one of the following circumstances:
//
// - context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel)
// - context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline)
@@ -159,6 +159,8 @@ type RuntimeConfig interface {
// entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated
// machine codes against async Goroutine preemption" section in RATIONALE.md for detail.
//
+ // Upon the termination of the function executions, api.Module is closed.
+ //
// Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces
// interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason,
// this is disabled by default.
@@ -217,9 +219,18 @@ const (
// part. wazero automatically performs ahead-of-time compilation as needed when
// Runtime.CompileModule is invoked.
//
-// Warning: This panics at runtime if the runtime.GOOS or runtime.GOARCH does not
-// support compiler. Use NewRuntimeConfig to safely detect and fallback to
-// NewRuntimeConfigInterpreter if needed.
+// # Warning
+//
+// - This panics at runtime if the runtime.GOOS or runtime.GOARCH does not
+// support compiler. Use NewRuntimeConfig to safely detect and fallback to
+// NewRuntimeConfigInterpreter if needed.
+//
+// - If you are using wazero in buildmode=c-archive or c-shared, make sure that you set up the alternate signal stack
+// by using, e.g. `sigaltstack` combined with `SA_ONSTACK` flag on `sigaction` on Linux,
+// before calling any api.Function. This is because the Go runtime does not set up the alternate signal stack
+// for c-archive or c-shared modes, and wazero uses the different stack than the calling Goroutine.
+// Hence, the signal handler might get invoked on the wazero's stack, which may cause a stack overflow.
+// https://github.com/tetratelabs/wazero/blob/2092c0a879f30d49d7b37f333f4547574b8afe0d/internal/integration_test/fuzz/fuzz/tests/sigstack.rs#L19-L36
func NewRuntimeConfigCompiler() RuntimeConfig {
ret := engineLessConfig.clone()
ret.engineKind = engineKindCompiler
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go
index 443c5a294..c75db615e 100644
--- a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go
@@ -21,13 +21,6 @@ type Snapshotter interface {
Snapshot() Snapshot
}
-// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled.
-// The context.Context passed to a exported function invocation should have this key set
-// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey.
-//
-// Deprecated: use WithSnapshotter to enable snapshots.
-type EnableSnapshotterKey = expctxkeys.EnableSnapshotterKey
-
// WithSnapshotter enables snapshots.
// Passing the returned context to a exported function invocation enables snapshots,
// and allows host functions to retrieve the Snapshotter using GetSnapshotter.
@@ -35,12 +28,6 @@ func WithSnapshotter(ctx context.Context) context.Context {
return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{})
}
-// SnapshotterKey is a context key to access a Snapshotter from a host function.
-// It is only present if EnableSnapshotter was set in the function invocation context.
-//
-// Deprecated: use GetSnapshotter to get the snapshotter.
-type SnapshotterKey = expctxkeys.SnapshotterKey
-
// GetSnapshotter gets the Snapshotter from a host function.
// It is only present if WithSnapshotter was called with the function invocation context.
func GetSnapshotter(ctx context.Context) Snapshotter {
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/listener.go b/vendor/github.com/tetratelabs/wazero/experimental/listener.go
index b2ba1fe83..55fc6b668 100644
--- a/vendor/github.com/tetratelabs/wazero/experimental/listener.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/listener.go
@@ -24,12 +24,6 @@ type StackIterator interface {
ProgramCounter() ProgramCounter
}
-// FunctionListenerFactoryKey is a context.Context Value key.
-// Its associated value should be a FunctionListenerFactory.
-//
-// Deprecated: use WithFunctionListenerFactory to enable snapshots.
-type FunctionListenerFactoryKey = expctxkeys.FunctionListenerFactoryKey
-
// WithFunctionListenerFactory registers a FunctionListenerFactory
// with the context.
func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context {
diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go
index 761a1f9dc..5ebc1780f 100644
--- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go
+++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go
@@ -23,6 +23,10 @@ const (
// instead of syscall.ENOTDIR
_ERROR_DIRECTORY = syscall.Errno(0x10B)
+ // _ERROR_NOT_A_REPARSE_POINT is a Windows error returned by os.Readlink
+ // instead of syscall.EINVAL
+ _ERROR_NOT_A_REPARSE_POINT = syscall.Errno(0x1126)
+
// _ERROR_INVALID_SOCKET is a Windows error returned by winsock_select
// when a given handle is not a socket.
_ERROR_INVALID_SOCKET = syscall.Errno(0x2736)
@@ -51,7 +55,7 @@ func errorToErrno(err error) Errno {
return EBADF
case syscall.ERROR_PRIVILEGE_NOT_HELD:
return EPERM
- case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME:
+ case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME, _ERROR_NOT_A_REPARSE_POINT:
return EINVAL
}
errno, _ := syscallToErrno(err)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
index a89ddc457..18c5f4252 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go
@@ -98,6 +98,9 @@ func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) {
// OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
func (e *moduleEngine) OwnsGlobals() bool { return false }
+// MemoryGrown implements wasm.ModuleEngine.
+func (e *moduleEngine) MemoryGrown() {}
+
// callEngine holds context per moduleEngine.Call, and shared across all the
// function calls originating from the same moduleEngine.Call execution.
//
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
index 81c6a6b62..8e9571b20 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go
@@ -43,7 +43,7 @@ type ExecutableContextT[Instr any] struct {
labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]
NextLabel Label
// LabelPositions maps a label to the instructions of the region which the label represents.
- LabelPositions map[Label]*LabelPosition[Instr]
+ LabelPositions []*LabelPosition[Instr]
OrderedBlockLabels []*LabelPosition[Instr]
// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock.
@@ -67,7 +67,6 @@ func NewExecutableContextT[Instr any](
setNext: setNext,
setPrev: setPrev,
labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]),
- LabelPositions: make(map[Label]*LabelPosition[Instr]),
NextLabel: LabelInvalid,
}
}
@@ -97,11 +96,7 @@ func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {
end := e.allocateNop0()
e.PerBlockHead, e.PerBlockEnd = end, end
- labelPos, ok := e.LabelPositions[l]
- if !ok {
- labelPos = e.AllocateLabelPosition(l)
- e.LabelPositions[l] = labelPos
- }
+ labelPos := e.GetOrAllocateLabelPosition(l)
e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)
labelPos.Begin, labelPos.End = end, end
labelPos.SB = blk
@@ -146,8 +141,8 @@ func (e *ExecutableContextT[T]) FlushPendingInstructions() {
func (e *ExecutableContextT[T]) Reset() {
e.labelPositionPool.Reset()
e.InstructionPool.Reset()
- for l := Label(0); l <= e.NextLabel; l++ {
- delete(e.LabelPositions, l)
+ for i := range e.LabelPositions {
+ e.LabelPositions[i] = nil
}
e.PendingInstructions = e.PendingInstructions[:0]
e.OrderedBlockLabels = e.OrderedBlockLabels[:0]
@@ -163,10 +158,17 @@ func (e *ExecutableContextT[T]) AllocateLabel() Label {
return e.NextLabel
}
-func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] {
- l := e.labelPositionPool.Allocate()
- l.L = la
- return l
+func (e *ExecutableContextT[T]) GetOrAllocateLabelPosition(l Label) *LabelPosition[T] {
+ if len(e.LabelPositions) <= int(l) {
+ e.LabelPositions = append(e.LabelPositions, make([]*LabelPosition[T], int(l)+1-len(e.LabelPositions))...)
+ }
+ ret := e.LabelPositions[l]
+ if ret == nil {
+ ret = e.labelPositionPool.Allocate()
+ ret.L = l
+ e.LabelPositions[l] = ret
+ }
+ return ret
}
func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
index 310ad2203..61ae6f406 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go
@@ -1906,8 +1906,10 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {
func (m *machine) Format() string {
ectx := m.ectx
begins := map[*instruction]backend.Label{}
- for l, pos := range ectx.LabelPositions {
- begins[pos.Begin] = l
+ for _, pos := range ectx.LabelPositions {
+ if pos != nil {
+ begins[pos.Begin] = pos.L
+ }
}
irBlocks := map[backend.Label]ssa.BasicBlockID{}
@@ -1950,7 +1952,10 @@ func (m *machine) encodeWithoutSSA(root *instruction) {
offset := int64(len(*bufPtr))
if cur.kind == nop0 {
l := cur.nop0Label()
- if pos, ok := ectx.LabelPositions[l]; ok {
+ if int(l) >= len(ectx.LabelPositions) {
+ continue
+ }
+ if pos := ectx.LabelPositions[l]; pos != nil {
pos.BinaryOffset = offset
}
}
@@ -2005,7 +2010,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {
switch cur.kind {
case nop0:
l := cur.nop0Label()
- if pos, ok := ectx.LabelPositions[l]; ok {
+ if pos := ectx.LabelPositions[l]; pos != nil {
pos.BinaryOffset = offset
}
case sourceOffsetInfo:
@@ -2165,8 +2170,7 @@ func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nol
func (m *machine) allocateLabel() *labelPosition {
ectx := m.ectx
l := ectx.AllocateLabel()
- pos := ectx.AllocateLabelPosition(l)
- ectx.LabelPositions[l] = pos
+ pos := ectx.GetOrAllocateLabelPosition(l)
return pos
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
index 6615471c6..4eaa13ce1 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go
@@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) {
bits := arg.Type.Bits()
// At this point of compilation, we don't yet know how much space exist below the return address.
// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation.
- amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}
load := m.allocateInstr()
switch arg.Type {
case ssa.TypeI32, ssa.TypeI64:
- load.asULoad(operandNR(reg), amode, bits)
+ load.asULoad(reg, amode, bits)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
- load.asFpuLoad(operandNR(reg), amode, bits)
+ load.asFpuLoad(reg, amode, bits)
default:
panic("BUG")
}
@@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) {
// At this point of compilation, we don't yet know how much space exist below the return address.
// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation.
- amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}
store := m.allocateInstr()
store.asStore(operandNR(reg), amode, bits)
m.insert(store)
@@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i
ldr := m.allocateInstr()
switch r.Type {
case ssa.TypeI32, ssa.TypeI64:
- ldr.asULoad(operandNR(reg), amode, r.Type.Bits())
+ ldr.asULoad(reg, amode, r.Type.Bits())
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
- ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits())
+ ldr.asFpuLoad(reg, amode, r.Type.Bits())
default:
panic("BUG")
}
@@ -225,7 +227,7 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i
}
}
-func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) {
+func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) {
exct := m.executableContext
exct.PendingInstructions = exct.PendingInstructions[:0]
mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse)
@@ -235,15 +237,15 @@ func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset
return cur, mode
}
-func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode {
+func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode {
if rn.RegType() != regalloc.RegTypeInt {
panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))
}
- var amode addressMode
+ amode := m.amodePool.Allocate()
if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) {
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}
} else if offsetFitsInAddressModeKindRegSignedImm9(offset) {
- amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}
} else {
var indexReg regalloc.VReg
if allowTmpRegUse {
@@ -253,7 +255,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg
indexReg = m.compiler.AllocateVReg(ssa.TypeI64)
m.lowerConstantI64(indexReg, offset)
}
- amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
+ *amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}
}
return amode
}
@@ -315,7 +317,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b
} else {
ao = aluOpSub
}
- alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true)
+ alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true)
m.insert(alu)
} else {
m.lowerConstantI64(tmpRegVReg, diff)
@@ -326,7 +328,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b
} else {
ao = aluOpSub
}
- alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true)
+ alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true)
m.insert(alu)
}
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
index 7a9cceb33..f8b5d97ac 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go
@@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo
} else {
postIndexImm = 8
}
- loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
+ loadMode := m.amodePool.Allocate()
+ *loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}
instr := m.allocateInstr()
switch typ {
case ssa.TypeI32:
- instr.asULoad(loadTargetReg, loadMode, 32)
+ instr.asULoad(loadTargetReg.reg(), loadMode, 32)
case ssa.TypeI64:
- instr.asULoad(loadTargetReg, loadMode, 64)
+ instr.asULoad(loadTargetReg.reg(), loadMode, 64)
case ssa.TypeF32:
- instr.asFpuLoad(loadTargetReg, loadMode, 32)
+ instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32)
case ssa.TypeF64:
- instr.asFpuLoad(loadTargetReg, loadMode, 64)
+ instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64)
case ssa.TypeV128:
- instr.asFpuLoad(loadTargetReg, loadMode, 128)
+ instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128)
}
cur = linkInstr(cur, instr)
if isStackArg {
- var storeMode addressMode
+ var storeMode *addressMode
cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)
toStack := m.allocateInstr()
toStack.asStore(loadTargetReg, storeMode, bits)
@@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg
}
if isStackArg {
- var loadMode addressMode
+ var loadMode *addressMode
cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)
toReg := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
- toReg.asULoad(storeTargetReg, loadMode, bits)
+ toReg.asULoad(storeTargetReg.reg(), loadMode, bits)
case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128:
- toReg.asFpuLoad(storeTargetReg, loadMode, bits)
+ toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits)
default:
panic("TODO?")
}
cur = linkInstr(cur, toReg)
}
- mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}
instr := m.allocateInstr()
instr.asStore(storeTargetReg, mode, bits)
cur = linkInstr(cur, instr)
@@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction
func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {
instr := m.allocateInstr()
- mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}
if store {
instr.asStore(operandNR(d), mode, 64)
} else {
- instr.asULoad(operandNR(d), mode, 64)
+ instr.asULoad(d, mode, 64)
}
return linkInstr(prev, instr)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
index 466b1f960..99e6bb482 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go
@@ -87,7 +87,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
// Module context is always the second argument.
moduleCtrPtr := x1VReg
store := m.allocateInstr()
- amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}
store.asStore(operandNR(moduleCtrPtr), amode, 64)
cur = linkInstr(cur, store)
}
@@ -120,11 +121,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
} else {
sizeInBits = 64
}
- store.asStore(operandNR(v),
- addressMode{
- kind: addressModeKindPostIndex,
- rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8),
- }, sizeInBits)
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)}
+ store.asStore(operandNR(v), amode, sizeInBits)
cur = linkInstr(cur, store)
}
@@ -139,7 +138,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
frameSizeReg = xzrVReg
sliceSizeReg = xzrVReg
}
- _amode := addressModePreOrPostIndex(spVReg, -16, true)
+ _amode := addressModePreOrPostIndex(m, spVReg, -16, true)
storeP := m.allocateInstr()
storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)
cur = linkInstr(cur, storeP)
@@ -165,8 +164,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)
ldr := m.allocateInstr()
// And load the return address.
- ldr.asULoad(operandNR(lrVReg),
- addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+ amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */)
+ ldr.asULoad(lrVReg, amode, 64)
cur = linkInstr(cur, ldr)
originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want.
@@ -183,23 +182,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
r := &abi.Rets[i]
if r.Kind == backend.ABIArgKindReg {
loadIntoReg := m.allocateInstr()
- mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
switch r.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asULoad(operandNR(r.Reg), mode, 32)
+ loadIntoReg.asULoad(r.Reg, mode, 32)
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asULoad(operandNR(r.Reg), mode, 64)
+ loadIntoReg.asULoad(r.Reg, mode, 64)
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32)
+ loadIntoReg.asFpuLoad(r.Reg, mode, 32)
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64)
+ loadIntoReg.asFpuLoad(r.Reg, mode, 64)
case ssa.TypeV128:
mode.imm = 16
- loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128)
+ loadIntoReg.asFpuLoad(r.Reg, mode, 128)
default:
panic("TODO")
}
@@ -208,28 +208,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *
// First we need to load the value to a temporary just like ^^.
intTmp, floatTmp := x11VReg, v11VReg
loadIntoTmpReg := m.allocateInstr()
- mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}
var resultReg regalloc.VReg
switch r.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32)
+ loadIntoTmpReg.asULoad(intTmp, mode, 32)
resultReg = intTmp
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64)
+ loadIntoTmpReg.asULoad(intTmp, mode, 64)
resultReg = intTmp
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32)
+ loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32)
resultReg = floatTmp
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64)
+ loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64)
resultReg = floatTmp
case ssa.TypeV128:
mode.imm = 16
- loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128)
+ loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128)
resultReg = floatTmp
default:
panic("TODO")
@@ -258,12 +259,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal
case regalloc.RegTypeFloat:
sizeInBits = 128
}
- store.asStore(operandNR(v),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: offset,
- }, sizeInBits)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: offset,
+ }
+ store.asStore(operandNR(v), mode, sizeInBits)
store.prev = cur
cur.next = store
cur = store
@@ -276,7 +278,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re
offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()
for _, v := range regs {
load := m.allocateInstr()
- var as func(dst operand, amode addressMode, sizeInBits byte)
+ var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte)
var sizeInBits byte
switch v.RegType() {
case regalloc.RegTypeInt:
@@ -286,12 +288,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re
as = load.asFpuLoad
sizeInBits = 128
}
- as(operandNR(v),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: offset,
- }, sizeInBits)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: offset,
+ }
+ as(v, mode, sizeInBits)
cur = linkInstr(cur, load)
offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.
}
@@ -324,11 +327,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode
// Set the exit status on the execution context.
setExistStatus := m.allocateInstr()
- setExistStatus.asStore(operandNR(constReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
- }, 32)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()}
+ setExistStatus.asStore(operandNR(constReg), mode, 32)
cur = linkInstr(cur, setExistStatus)
return cur
}
@@ -340,12 +341,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {
cur = linkInstr(cur, adr)
storeReturnAddr := m.allocateInstr()
- storeReturnAddr.asStore(operandNR(tmpRegVReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
- }, 64)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
+ }
+ storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64)
cur = linkInstr(cur, storeReturnAddr)
// Exit the execution.
@@ -364,11 +366,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe
cur = linkInstr(cur, movSp)
strSp := m.allocateInstr()
- strSp.asStore(operandNR(tmpRegVReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
- }, 64)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
+ }
+ strSp.asStore(operandNR(tmpRegVReg), mode, 64)
cur = linkInstr(cur, strSp)
return cur
}
@@ -376,27 +379,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe
func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {
load := m.allocateInstr()
var result regalloc.VReg
- mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}
switch arg.Type {
case ssa.TypeI32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asULoad(operandNR(intVReg), mode, 32)
+ load.asULoad(intVReg, mode, 32)
result = intVReg
case ssa.TypeI64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asULoad(operandNR(intVReg), mode, 64)
+ load.asULoad(intVReg, mode, 64)
result = intVReg
case ssa.TypeF32:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asFpuLoad(operandNR(floatVReg), mode, 32)
+ load.asFpuLoad(floatVReg, mode, 32)
result = floatVReg
case ssa.TypeF64:
mode.imm = 8 // We use uint64 for all basic types, except SIMD v128.
- load.asFpuLoad(operandNR(floatVReg), mode, 64)
+ load.asFpuLoad(floatVReg, mode, 64)
result = floatVReg
case ssa.TypeV128:
mode.imm = 16
- load.asFpuLoad(operandNR(floatVReg), mode, 128)
+ load.asFpuLoad(floatVReg, mode, 128)
result = floatVReg
default:
panic("TODO")
@@ -408,7 +412,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r
func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {
store := m.allocateInstr()
- mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}
var sizeInBits byte
switch result.Type {
case ssa.TypeI32, ssa.TypeF32:
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
index 8aabc5997..7121cb538 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go
@@ -3,10 +3,12 @@ package arm64
import (
"fmt"
"math"
+ "unsafe"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"
"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"
"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa"
+ "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"
)
type (
@@ -22,9 +24,9 @@ type (
// TODO: optimize the layout later once the impl settles.
instruction struct {
prev, next *instruction
- u1, u2, u3 uint64
- rd, rm, rn, ra operand
- amode addressMode
+ u1, u2 uint64
+ rd regalloc.VReg
+ rm, rn operand
kind instructionKind
addedBeforeRegAlloc bool
}
@@ -174,7 +176,7 @@ func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {
switch defKinds[i.kind] {
case defKindNone:
case defKindRD:
- *regs = append(*regs, i.rd.nr())
+ *regs = append(*regs, i.rd)
case defKindCall:
_, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2)
for i := byte(0); i < retIntRealRegs; i++ {
@@ -194,7 +196,7 @@ func (i *instruction) AssignDef(reg regalloc.VReg) {
switch defKinds[i.kind] {
case defKindNone:
case defKindRD:
- i.rd = i.rd.assignReg(reg)
+ i.rd = reg
case defKindCall:
panic("BUG: call instructions shouldn't be assigned")
default:
@@ -329,7 +331,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
if rm := i.rm.reg(); rm.Valid() {
*regs = append(*regs, rm)
}
- if ra := i.ra.reg(); ra.Valid() {
+ if ra := regalloc.VReg(i.u2); ra.Valid() {
*regs = append(*regs, ra)
}
case useKindRNRN1RM:
@@ -341,18 +343,20 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
*regs = append(*regs, rm)
}
case useKindAMode:
- if amodeRN := i.amode.rn; amodeRN.Valid() {
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
*regs = append(*regs, amodeRN)
}
- if amodeRM := i.amode.rm; amodeRM.Valid() {
+ if amodeRM := amode.rm; amodeRM.Valid() {
*regs = append(*regs, amodeRM)
}
case useKindRNAMode:
*regs = append(*regs, i.rn.reg())
- if amodeRN := i.amode.rn; amodeRN.Valid() {
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
*regs = append(*regs, amodeRN)
}
- if amodeRM := i.amode.rm; amodeRM.Valid() {
+ if amodeRM := amode.rm; amodeRM.Valid() {
*regs = append(*regs, amodeRM)
}
case useKindCond:
@@ -374,7 +378,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {
case useKindRDRewrite:
*regs = append(*regs, i.rn.reg())
*regs = append(*regs, i.rm.reg())
- *regs = append(*regs, i.rd.reg())
+ *regs = append(*regs, i.rd)
default:
panic(fmt.Sprintf("useKind for %v not defined", i))
}
@@ -408,8 +412,8 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
i.rm = i.rm.assignReg(reg)
}
} else {
- if rd := i.rd.reg(); rd.Valid() {
- i.rd = i.rd.assignReg(reg)
+ if rd := i.rd; rd.Valid() {
+ i.rd = reg
}
}
case useKindRNRN1RM:
@@ -435,32 +439,36 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) {
i.rm = i.rm.assignReg(reg)
}
} else {
- if ra := i.ra.reg(); ra.Valid() {
- i.ra = i.ra.assignReg(reg)
+ if ra := regalloc.VReg(i.u2); ra.Valid() {
+ i.u2 = uint64(reg)
}
}
case useKindAMode:
if index == 0 {
- if amodeRN := i.amode.rn; amodeRN.Valid() {
- i.amode.rn = reg
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
+ amode.rn = reg
}
} else {
- if amodeRM := i.amode.rm; amodeRM.Valid() {
- i.amode.rm = reg
+ amode := i.getAmode()
+ if amodeRM := amode.rm; amodeRM.Valid() {
+ amode.rm = reg
}
}
case useKindRNAMode:
if index == 0 {
i.rn = i.rn.assignReg(reg)
} else if index == 1 {
- if amodeRN := i.amode.rn; amodeRN.Valid() {
- i.amode.rn = reg
+ amode := i.getAmode()
+ if amodeRN := amode.rn; amodeRN.Valid() {
+ amode.rn = reg
} else {
panic("BUG")
}
} else {
- if amodeRM := i.amode.rm; amodeRM.Valid() {
- i.amode.rm = reg
+ amode := i.getAmode()
+ if amodeRM := amode.rm; amodeRM.Valid() {
+ amode.rm = reg
} else {
panic("BUG")
}
@@ -503,35 +511,35 @@ func (i *instruction) callFuncRef() ssa.FuncRef {
}
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
-func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {
i.kind = movZ
- i.rd = operandNR(dst)
+ i.rd = dst
i.u1 = imm
- i.u2 = shift
+ i.u2 = uint64(shift)
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
-func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {
i.kind = movK
- i.rd = operandNR(dst)
+ i.rd = dst
i.u1 = imm
- i.u2 = shift
+ i.u2 = uint64(shift)
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false)
-func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) {
+func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {
i.kind = movN
- i.rd = operandNR(dst)
+ i.rd = dst
i.u1 = imm
- i.u2 = shift
+ i.u2 = uint64(shift)
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
@@ -553,21 +561,21 @@ func (i *instruction) asRet() {
i.kind = ret
}
-func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) {
+func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) {
i.kind = storeP64
i.rn = operandNR(src1)
i.rm = operandNR(src2)
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) {
+func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) {
i.kind = loadP64
i.rn = operandNR(src1)
i.rm = operandNR(src2)
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 8:
i.kind = store8
@@ -589,10 +597,10 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {
i.kind = fpuStore128
}
i.rn = src
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 8:
i.kind = sLoad8
@@ -604,10 +612,10 @@ func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {
panic("BUG")
}
i.rd = dst
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 8:
i.kind = uLoad8
@@ -619,10 +627,10 @@ func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {
i.kind = uLoad64
}
i.rd = dst
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) {
+func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {
switch sizeInBits {
case 32:
i.kind = fpuLoad32
@@ -632,10 +640,18 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte)
i.kind = fpuLoad128
}
i.rd = dst
- i.amode = amode
+ i.setAmode(amode)
}
-func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
+func (i *instruction) getAmode() *addressMode {
+ return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1))
+}
+
+func (i *instruction) setAmode(a *addressMode) {
+ i.u1 = uint64(uintptr(unsafe.Pointer(a)))
+}
+
+func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) {
// NOTE: currently only has support for no-offset loads, though it is suspicious that
// we would need to support offset load (that is only available for post-index).
i.kind = vecLoad1R
@@ -646,32 +662,32 @@ func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {
func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) {
i.kind = cSet
- i.rd = operandNR(rd)
+ i.rd = rd
i.u1 = uint64(c)
if mask {
i.u2 = 1
}
}
-func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
+func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) {
i.kind = cSel
i.rd = rd
i.rn = rn
i.rm = rm
i.u1 = uint64(c)
if _64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
-func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) {
+func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) {
i.kind = fpuCSel
i.rd = rd
i.rn = rn
i.rm = rm
i.u1 = uint64(c)
if _64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
@@ -691,7 +707,7 @@ func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, tar
}
func (i *instruction) brTableSequenceOffsetsResolved() {
- i.u3 = 1 // indicate that the offsets are resolved, for debugging.
+ i.rm.data = 1 // indicate that the offsets are resolved, for debugging.
}
func (i *instruction) brLabel() label {
@@ -701,7 +717,7 @@ func (i *instruction) brLabel() label {
// brOffsetResolved is called when the target label is resolved.
func (i *instruction) brOffsetResolve(offset int64) {
i.u2 = uint64(offset)
- i.u3 = 1 // indicate that the offset is resolved, for debugging.
+ i.rm.data = 1 // indicate that the offset is resolved, for debugging.
}
func (i *instruction) brOffset() int64 {
@@ -714,7 +730,7 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) {
i.u1 = c.asUint64()
i.u2 = uint64(target)
if is64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
@@ -728,17 +744,17 @@ func (i *instruction) condBrLabel() label {
// condBrOffsetResolve is called when the target label is resolved.
func (i *instruction) condBrOffsetResolve(offset int64) {
- i.rd.data = uint64(offset)
- i.rd.data2 = 1 // indicate that the offset is resolved, for debugging.
+ i.rn.data = uint64(offset)
+ i.rn.data2 = 1 // indicate that the offset is resolved, for debugging.
}
// condBrOffsetResolved returns true if condBrOffsetResolve is already called.
func (i *instruction) condBrOffsetResolved() bool {
- return i.rd.data2 == 1
+ return i.rn.data2 == 1
}
func (i *instruction) condBrOffset() int64 {
- return int64(i.rd.data)
+ return int64(i.rn.data)
}
func (i *instruction) condBrCond() cond {
@@ -746,33 +762,33 @@ func (i *instruction) condBrCond() cond {
}
func (i *instruction) condBr64bit() bool {
- return i.u3 == 1
+ return i.u2&(1<<32) != 0
}
func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) {
i.kind = loadFpuConst32
i.u1 = raw
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) {
i.kind = loadFpuConst64
i.u1 = raw
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) {
i.kind = loadFpuConst128
i.u1 = lo
i.u2 = hi
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) {
i.kind = fpuCmp
i.rn, i.rm = rn, rm
if is64bit {
- i.u3 = 1
+ i.u1 = 1
}
}
@@ -783,12 +799,12 @@ func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, i
i.u1 = uint64(c)
i.u2 = uint64(flag)
if is64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// asALU setups a basic ALU instruction.
-func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
+func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {
switch rm.kind {
case operandKindNR:
i.kind = aluRRR
@@ -804,22 +820,22 @@ func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
i.u1 = uint64(aluOp)
i.rd, i.rn, i.rm = rd, rn, rm
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
// asALU setups a basic ALU instruction.
-func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) {
+func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) {
i.kind = aluRRRR
i.u1 = uint64(aluOp)
- i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra
+ i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra)
if dst64bit {
- i.u3 = 1
+ i.u1 |= 1 << 32
}
}
// asALUShift setups a shift based ALU instruction.
-func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {
+func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {
switch rm.kind {
case operandKindNR:
i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands.
@@ -831,17 +847,17 @@ func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool)
i.u1 = uint64(aluOp)
i.rd, i.rn, i.rm = rd, rn, rm
if dst64bit {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) {
i.kind = aluRRBitmaskImm
i.u1 = uint64(aluOp)
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
i.u2 = imm
if dst64bit {
- i.u3 = 1
+ i.u1 |= 1 << 32
}
}
@@ -852,76 +868,76 @@ func (i *instruction) asMovToFPSR(rn regalloc.VReg) {
func (i *instruction) asMovFromFPSR(rd regalloc.VReg) {
i.kind = movFromFPSR
- i.rd = operandNR(rd)
+ i.rd = rd
}
func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) {
i.kind = bitRR
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
i.u1 = uint64(bitOp)
if is64bit {
i.u2 = 1
}
}
-func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) {
+func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {
i.kind = fpuRRR
i.u1 = uint64(op)
i.rd, i.rn, i.rm = rd, rn, rm
if dst64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
-func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) {
+func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) {
i.kind = fpuRR
i.u1 = uint64(op)
i.rd, i.rn = rd, rn
if dst64bit {
- i.u3 = 1
+ i.u2 = 1
}
}
func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) {
i.kind = extend
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
i.u1 = uint64(fromBits)
i.u2 = uint64(toBits)
if signed {
- i.u3 = 1
+ i.u2 |= 1 << 32
}
}
func (i *instruction) asMove32(rd, rn regalloc.VReg) {
i.kind = mov32
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
}
func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction {
i.kind = mov64
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
return i
}
func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {
i.kind = fpuMov64
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
}
func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction {
i.kind = fpuMov128
- i.rn, i.rd = operandNR(rn), operandNR(rd)
+ i.rn, i.rd = operandNR(rn), rd
return i
}
-func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) {
+func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) {
i.kind = movToVec
i.rd = rd
i.rn = rn
i.u1, i.u2 = uint64(arr), uint64(index)
}
-func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) {
+func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) {
if signed {
i.kind = movFromVecSigned
} else {
@@ -932,48 +948,48 @@ func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vec
i.u1, i.u2 = uint64(arr), uint64(index)
}
-func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) {
+func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) {
i.kind = vecDup
i.u1 = uint64(arr)
i.rn, i.rd = rn, rd
}
-func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) {
+func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) {
i.kind = vecDupElement
i.u1 = uint64(arr)
i.rn, i.rd = rn, rd
i.u2 = uint64(index)
}
-func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) {
+func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) {
i.kind = vecExtract
i.u1 = uint64(arr)
i.rn, i.rm, i.rd = rn, rm, rd
i.u2 = uint64(index)
}
-func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
+func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {
i.kind = vecMovElement
i.u1 = uint64(arr)
- i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex)
+ i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32
i.rn, i.rd = rn, rd
}
-func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) {
+func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) {
i.kind = vecMisc
i.u1 = uint64(op)
i.rn, i.rd = rn, rd
i.u2 = uint64(arr)
}
-func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) {
+func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) {
i.kind = vecLanes
i.u1 = uint64(op)
i.rn, i.rd = rn, rd
i.u2 = uint64(arr)
}
-func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction {
+func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction {
i.kind = vecShiftImm
i.u1 = uint64(op)
i.rn, i.rm, i.rd = rn, rm, rd
@@ -981,7 +997,7 @@ func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrange
return i
}
-func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) {
+func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
switch nregs {
case 0, 1:
i.kind = vecTbl
@@ -1000,14 +1016,14 @@ func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangemen
i.u2 = uint64(arr)
}
-func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) {
+func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
i.kind = vecPermute
i.u1 = uint64(op)
i.rn, i.rm, i.rd = rn, rm, rd
i.u2 = uint64(arr)
}
-func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction {
+func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction {
i.kind = vecRRR
i.u1 = uint64(op)
i.rn, i.rd, i.rm = rn, rd, rm
@@ -1017,7 +1033,7 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement)
// asVecRRRRewrite encodes a vector instruction that rewrites the destination register.
// IMPORTANT: the destination register must be already defined before this instruction.
-func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) {
+func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
i.kind = vecRRRRewrite
i.u1 = uint64(op)
i.rn, i.rd, i.rm = rn, rd, rm
@@ -1033,8 +1049,8 @@ func (i *instruction) IsCopy() bool {
// String implements fmt.Stringer.
func (i *instruction) String() (str string) {
- is64SizeBitToSize := func(u3 uint64) byte {
- if u3 == 0 {
+ is64SizeBitToSize := func(v uint64) byte {
+ if v == 0 {
return 32
}
return 64
@@ -1049,46 +1065,46 @@ func (i *instruction) String() (str string) {
str = "nop0"
}
case aluRRR:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size),
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size),
i.rm.format(size))
case aluRRRR:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u1 >> 32)
str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size))
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size))
case aluRRImm12:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))
case aluRRBitmaskImm:
- size := is64SizeBitToSize(i.u3)
- rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size)
+ size := is64SizeBitToSize(i.u1 >> 32)
+ rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size)
if size == 32 {
str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2))
} else {
str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2)
}
case aluRRImmShift:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %#x",
aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
i.rm.shiftImm(),
)
case aluRRRShift:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s",
aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
i.rm.format(size),
)
case aluRRRExtend:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
// Regardless of the source size, the register is formatted in 32-bit.
i.rm.format(32),
@@ -1097,57 +1113,57 @@ func (i *instruction) String() (str string) {
size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("%s %s, %s",
bitOp(i.u1),
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
)
case uLoad8:
- str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case sLoad8:
- str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case uLoad16:
- str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case sLoad16:
- str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case uLoad32:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case sLoad32:
- str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case uLoad64:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64))
case store8:
- str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8))
+ str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8))
case store16:
- str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16))
+ str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16))
case store32:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32))
case store64:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64))
case storeP64:
str = fmt.Sprintf("stp %s, %s, %s",
- formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
+ formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64))
case loadP64:
str = fmt.Sprintf("ldp %s, %s, %s",
- formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64))
+ formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64))
case mov64:
str = fmt.Sprintf("mov %s, %s",
- formatVRegSized(i.rd.nr(), 64),
+ formatVRegSized(i.rd, 64),
formatVRegSized(i.rn.nr(), 64))
case mov32:
- str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32))
+ str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32))
case movZ:
- size := is64SizeBitToSize(i.u3)
- str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+ size := is64SizeBitToSize(i.u2 >> 32)
+ str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)
case movN:
- size := is64SizeBitToSize(i.u3)
- str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+ size := is64SizeBitToSize(i.u2 >> 32)
+ str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)
case movK:
- size := is64SizeBitToSize(i.u3)
- str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16)
+ size := is64SizeBitToSize(i.u2 >> 32)
+ str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)
case extend:
fromBits, toBits := byte(i.u1), byte(i.u2)
var signedStr string
- if i.u3 == 1 {
+ if i.u2>>32 == 1 {
signedStr = "s"
} else {
signedStr = "u"
@@ -1161,39 +1177,39 @@ func (i *instruction) String() (str string) {
case 32:
fromStr = "w"
}
- str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32))
+ str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32))
case cSel:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("csel %s, %s, %s, %s",
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
formatVRegSized(i.rm.nr(), size),
condFlag(i.u1),
)
case cSet:
if i.u2 != 0 {
- str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
+ str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1))
} else {
- str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1))
+ str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1))
}
case cCmpImm:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s",
formatVRegSized(i.rn.nr(), size), i.rm.data,
i.u2&0b1111,
condFlag(i.u1))
case fpuMov64:
str = fmt.Sprintf("mov %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement8B, vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone))
case fpuMov128:
str = fmt.Sprintf("mov %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement16B, vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone))
case fpuMovFromVec:
panic("TODO")
case fpuRR:
- dstSz := is64SizeBitToSize(i.u3)
+ dstSz := is64SizeBitToSize(i.u2)
srcSz := dstSz
op := fpuUniOp(i.u1)
switch op {
@@ -1203,38 +1219,38 @@ func (i *instruction) String() (str string) {
srcSz = 64
}
str = fmt.Sprintf("%s %s, %s", op.String(),
- formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz))
+ formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz))
case fpuRRR:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(),
- formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
+ formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
case fpuRRI:
panic("TODO")
case fpuRRRR:
panic("TODO")
case fpuCmp:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u1)
str = fmt.Sprintf("fcmp %s, %s",
formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))
case fpuLoad32:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))
case fpuStore32:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64))
case fpuLoad64:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64))
case fpuStore64:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64))
case fpuLoad128:
- str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64))
+ str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64))
case fpuStore128:
- str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64))
+ str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64))
case loadFpuConst32:
- str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1)))
+ str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1)))
case loadFpuConst64:
- str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1))
+ str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1))
case loadFpuConst128:
str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x",
- formatVRegSized(i.rd.nr(), 128), i.u1, i.u2)
+ formatVRegSized(i.rd, 128), i.u1, i.u2)
case fpuToInt:
var op, src, dst string
if signed := i.u1 == 1; signed {
@@ -1242,15 +1258,15 @@ func (i *instruction) String() (str string) {
} else {
op = "fcvtzu"
}
- if src64 := i.u2 == 1; src64 {
+ if src64 := i.u2&1 != 0; src64 {
src = formatVRegWidthVec(i.rn.nr(), vecArrangementD)
} else {
src = formatVRegWidthVec(i.rn.nr(), vecArrangementS)
}
- if dst64 := i.u3 == 1; dst64 {
- dst = formatVRegSized(i.rd.nr(), 64)
+ if dst64 := i.u2&2 != 0; dst64 {
+ dst = formatVRegSized(i.rd, 64)
} else {
- dst = formatVRegSized(i.rd.nr(), 32)
+ dst = formatVRegSized(i.rd, 32)
}
str = fmt.Sprintf("%s %s, %s", op, dst, src)
@@ -1261,21 +1277,21 @@ func (i *instruction) String() (str string) {
} else {
op = "ucvtf"
}
- if src64 := i.u2 == 1; src64 {
+ if src64 := i.u2&1 != 0; src64 {
src = formatVRegSized(i.rn.nr(), 64)
} else {
src = formatVRegSized(i.rn.nr(), 32)
}
- if dst64 := i.u3 == 1; dst64 {
- dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD)
+ if dst64 := i.u2&2 != 0; dst64 {
+ dst = formatVRegWidthVec(i.rd, vecArrangementD)
} else {
- dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS)
+ dst = formatVRegWidthVec(i.rd, vecArrangementS)
}
str = fmt.Sprintf("%s %s, %s", op, dst, src)
case fpuCSel:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2)
str = fmt.Sprintf("fcsel %s, %s, %s, %s",
- formatVRegSized(i.rd.nr(), size),
+ formatVRegSized(i.rd, size),
formatVRegSized(i.rn.nr(), size),
formatVRegSized(i.rm.nr(), size),
condFlag(i.u1),
@@ -1291,7 +1307,7 @@ func (i *instruction) String() (str string) {
default:
panic("unsupported arrangement " + arr.String())
}
- str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
+ str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))
case movFromVec, movFromVecSigned:
var size byte
var opcode string
@@ -1315,23 +1331,23 @@ func (i *instruction) String() (str string) {
default:
panic("unsupported arrangement " + arr.String())
}
- str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
+ str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))
case vecDup:
str = fmt.Sprintf("dup %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone),
formatVRegSized(i.rn.nr(), 64),
)
case vecDupElement:
arr := vecArrangement(i.u1)
str = fmt.Sprintf("dup %s, %s",
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)),
)
case vecDupFromFpu:
panic("TODO")
case vecExtract:
str = fmt.Sprintf("ext %s, %s, %s, #%d",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone),
formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone),
uint32(i.u2),
@@ -1340,15 +1356,15 @@ func (i *instruction) String() (str string) {
panic("TODO")
case vecMovElement:
str = fmt.Sprintf("mov %s, %s",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)),
- formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)),
+ formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)),
+ formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)),
)
case vecMiscNarrow:
panic("TODO")
case vecRRR, vecRRRRewrite:
str = fmt.Sprintf("%s %s, %s, %s",
vecOp(i.u1),
- formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone),
)
@@ -1356,12 +1372,12 @@ func (i *instruction) String() (str string) {
vop := vecOp(i.u1)
if vop == vecOpCmeq0 {
str = fmt.Sprintf("cmeq %s, %s, #0",
- formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
} else {
str = fmt.Sprintf("%s %s, %s",
vop,
- formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone),
+ formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))
}
case vecLanes:
@@ -1379,24 +1395,24 @@ func (i *instruction) String() (str string) {
}
str = fmt.Sprintf("%s %s, %s",
vecOp(i.u1),
- formatVRegWidthVec(i.rd.nr(), destArr),
+ formatVRegWidthVec(i.rd, destArr),
formatVRegVec(i.rn.nr(), arr, vecIndexNone))
case vecShiftImm:
arr := vecArrangement(i.u2)
str = fmt.Sprintf("%s %s, %s, #%d",
vecOp(i.u1),
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), arr, vecIndexNone),
i.rm.shiftImm())
case vecTbl:
arr := vecArrangement(i.u2)
str = fmt.Sprintf("tbl %s, { %s }, %s",
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone),
formatVRegVec(i.rm.nr(), arr, vecIndexNone))
case vecTbl2:
arr := vecArrangement(i.u2)
- rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr()
+ rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr()
rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())
str = fmt.Sprintf("tbl %s, { %s, %s }, %s",
formatVRegVec(rd, arr, vecIndexNone),
@@ -1407,13 +1423,13 @@ func (i *instruction) String() (str string) {
arr := vecArrangement(i.u2)
str = fmt.Sprintf("%s %s, %s, %s",
vecOp(i.u1),
- formatVRegVec(i.rd.nr(), arr, vecIndexNone),
+ formatVRegVec(i.rd, arr, vecIndexNone),
formatVRegVec(i.rn.nr(), arr, vecIndexNone),
formatVRegVec(i.rm.nr(), arr, vecIndexNone))
case movToFPSR:
str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64))
case movFromFPSR:
- str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64))
+ str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64))
case call:
str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1))
case callInd:
@@ -1422,15 +1438,15 @@ func (i *instruction) String() (str string) {
str = "ret"
case br:
target := label(i.u1)
- if i.u3 != 0 {
+ if i.rm.data != 0 {
str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String())
} else {
str = fmt.Sprintf("b %s", target.String())
}
case condBr:
- size := is64SizeBitToSize(i.u3)
+ size := is64SizeBitToSize(i.u2 >> 32)
c := cond(i.u1)
- target := label(i.u2)
+ target := label(i.u2 & 0xffffffff)
switch c.kind() {
case condKindRegisterZero:
if !i.condBrOffsetResolved() {
@@ -1456,7 +1472,7 @@ func (i *instruction) String() (str string) {
}
}
case adr:
- str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1))
+ str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1))
case brTableSequence:
targetIndex := i.u1
str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex)
@@ -1473,7 +1489,7 @@ func (i *instruction) String() (str string) {
case 1:
m = m + "b"
}
- str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64))
case atomicCas:
m := "casal"
size := byte(32)
@@ -1485,7 +1501,7 @@ func (i *instruction) String() (str string) {
case 1:
m = m + "b"
}
- str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))
case atomicLoad:
m := "ldar"
size := byte(32)
@@ -1497,7 +1513,7 @@ func (i *instruction) String() (str string) {
case 1:
m = m + "b"
}
- str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64))
case atomicStore:
m := "stlr"
size := byte(32)
@@ -1517,9 +1533,9 @@ func (i *instruction) String() (str string) {
case emitSourceOffsetInfo:
str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1))
case vecLoad1R:
- str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
+ str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))
case loadConstBlockArg:
- str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1)
+ str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1)
default:
panic(i.kind)
}
@@ -1528,26 +1544,26 @@ func (i *instruction) String() (str string) {
func (i *instruction) asAdr(rd regalloc.VReg, offset int64) {
i.kind = adr
- i.rd = operandNR(rd)
+ i.rd = rd
i.u1 = uint64(offset)
}
-func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) {
+func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) {
i.kind = atomicRmw
- i.rd, i.rn, i.rm = rt, rn, rs
+ i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs)
i.u1 = uint64(op)
i.u2 = size
}
-func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) {
+func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) {
i.kind = atomicCas
- i.rm, i.rn, i.rd = rt, rn, rs
+ i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs
i.u2 = size
}
-func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) {
+func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) {
i.kind = atomicLoad
- i.rn, i.rd = rn, rt
+ i.rn, i.rd = operandNR(rn), rt
i.u2 = size
}
@@ -1755,12 +1771,12 @@ func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.V
i.kind = loadConstBlockArg
i.u1 = v
i.u2 = uint64(typ)
- i.rd = operandNR(dst)
+ i.rd = dst
return i
}
func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) {
- return i.u1, ssa.Type(i.u2), i.rd.nr()
+ return i.u1, ssa.Type(i.u2), i.rd
}
func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction {
@@ -1778,7 +1794,7 @@ func (i *instruction) asUDF() *instruction {
return i
}
-func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) {
+func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) {
i.kind = fpuToInt
i.rn = rn
i.rd = rd
@@ -1789,11 +1805,11 @@ func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bo
i.u2 = 1
}
if dst64bit {
- i.u3 = 1
+ i.u2 |= 2
}
}
-func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) {
+func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) {
i.kind = intToFpu
i.rn = rn
i.rd = rd
@@ -1804,7 +1820,7 @@ func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bo
i.u2 = 1
}
if dst64bit {
- i.u3 = 1
+ i.u2 |= 2
}
}
@@ -1817,7 +1833,7 @@ func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction {
// aluOp determines the type of ALU operation. Instructions whose kind is one of
// aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend
// would use this type.
-type aluOp int
+type aluOp uint32
func (a aluOp) String() string {
switch a {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
index 227a96474..f0ede2d6a 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go
@@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) {
case callInd:
c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))
case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128:
- c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode))
+ c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode()))
case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128:
- c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode))
+ c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode()))
case vecLoad1R:
c.Emit4Bytes(encodeVecLoad1R(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u1)))
case condBr:
@@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) {
panic("BUG")
}
case movN:
- c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+ c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
case movZ:
- c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+ c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
case movK:
- c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3))
+ c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))
case mov32:
- to, from := i.rd.realReg(), i.rn.realReg()
+ to, from := i.rd.RealReg(), i.rn.realReg()
c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to]))
case mov64:
- to, from := i.rd.realReg(), i.rn.realReg()
+ to, from := i.rd.RealReg(), i.rn.realReg()
toIsSp := to == sp
fromIsSp := from == sp
c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp))
case loadP64, storeP64:
rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
- amode := i.amode
+ amode := i.getAmode()
rn := regNumberInEncoding[amode.rn.RealReg()]
var pre bool
switch amode.kind {
@@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) {
}
c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm))
case loadFpuConst32:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
if i.u1 == 0 {
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
} else {
encodeLoadFpuConst32(c, rd, i.u1)
}
case loadFpuConst64:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
if i.u1 == 0 {
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))
} else {
- encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1)
+ encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1)
}
case loadFpuConst128:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
lo, hi := i.u1, i.u2
if lo == 0 && hi == 0 {
c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B))
@@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) {
case aluRRRR:
c.Emit4Bytes(encodeAluRRRR(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
- regNumberInEncoding[i.ra.realReg()],
- uint32(i.u3),
+ regNumberInEncoding[regalloc.VReg(i.u2).RealReg()],
+ uint32(i.u1>>32),
))
case aluRRImmShift:
c.Emit4Bytes(encodeAluRRImm(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.rm.shiftImm()),
- uint32(i.u3),
+ uint32(i.u2>>32),
))
case aluRRR:
rn := i.rn.realReg()
c.Emit4Bytes(encodeAluRRR(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[rn],
regNumberInEncoding[i.rm.realReg()],
- i.u3 == 1,
+ i.u2>>32 == 1,
rn == sp,
))
case aluRRRExtend:
rm, exo, to := i.rm.er()
c.Emit4Bytes(encodeAluRRRExtend(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[rm.RealReg()],
exo,
@@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) {
r, amt, sop := i.rm.sr()
c.Emit4Bytes(encodeAluRRRShift(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[r.RealReg()],
uint32(amt),
sop,
- i.u3 == 1,
+ i.u2>>32 == 1,
))
case aluRRBitmaskImm:
c.Emit4Bytes(encodeAluBitmaskImmediate(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
i.u2,
- i.u3 == 1,
+ i.u1>>32 == 1,
))
case bitRR:
c.Emit4Bytes(encodeBitRR(
bitOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2)),
)
@@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) {
imm12, shift := i.rm.imm12()
c.Emit4Bytes(encodeAluRRImm12(
aluOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
imm12, shift,
- i.u3 == 1,
+ i.u2>>32 == 1,
))
case fpuRRR:
c.Emit4Bytes(encodeFpuRRR(
fpuBinOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
- i.u3 == 1,
+ i.u2 == 1,
))
case fpuMov64, fpuMov128:
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register--
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
rn := regNumberInEncoding[i.rn.realReg()]
var q uint32
if kind == fpuMov128 {
@@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) {
}
c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd)
case cSet:
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
cf := condFlag(i.u1)
if i.u2 == 1 {
// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV-
@@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd)
}
case extend:
- c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()]))
+ c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()]))
case fpuCmp:
// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en
rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]
var ftype uint32
- if i.u3 == 1 {
+ if i.u1 == 1 {
ftype = 0b01 // double precision.
}
c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5)
@@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(0)
}
case adr:
- c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1)))
+ c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1)))
case cSel:
c.Emit4Bytes(encodeConditionalSelect(
kind,
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
condFlag(i.u1),
- i.u3 == 1,
+ i.u2 == 1,
))
case fpuCSel:
c.Emit4Bytes(encodeFpuCSel(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
condFlag(i.u1),
- i.u3 == 1,
+ i.u2 == 1,
))
case movToVec:
c.Emit4Bytes(encodeMoveToVec(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2),
))
case movFromVec, movFromVecSigned:
c.Emit4Bytes(encodeMoveFromVec(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2),
@@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) {
))
case vecDup:
c.Emit4Bytes(encodeVecDup(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1))))
case vecDupElement:
c.Emit4Bytes(encodeVecDupElement(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(byte(i.u1)),
vecIndex(i.u2)))
case vecExtract:
c.Emit4Bytes(encodeVecExtract(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(byte(i.u1)),
@@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) {
case vecPermute:
c.Emit4Bytes(encodeVecPermute(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(byte(i.u2))))
case vecMovElement:
c.Emit4Bytes(encodeVecMovElement(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u1),
- uint32(i.u2), uint32(i.u3),
+ uint32(i.u2), uint32(i.u2>>32),
))
case vecMisc:
c.Emit4Bytes(encodeAdvancedSIMDTwoMisc(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u2),
))
case vecLanes:
c.Emit4Bytes(encodeVecLanes(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
vecArrangement(i.u2),
))
case vecShiftImm:
c.Emit4Bytes(encodeVecShiftImm(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.rm.shiftImm()),
vecArrangement(i.u2),
@@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) {
case vecTbl:
c.Emit4Bytes(encodeVecTbl(
1,
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(i.u2)),
@@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) {
case vecTbl2:
c.Emit4Bytes(encodeVecTbl(
2,
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(i.u2)),
@@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) {
case fpuRR:
c.Emit4Bytes(encodeFloatDataOneSource(
fpuUniOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
- i.u3 == 1,
+ i.u2 == 1,
))
case vecRRR:
if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal {
@@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) {
case vecRRRRewrite:
c.Emit4Bytes(encodeVecRRR(
vecOp(i.u1),
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
regNumberInEncoding[i.rm.realReg()],
vecArrangement(i.u2),
))
case cCmpImm:
// Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en
- sf := uint32(i.u3 & 0b1)
+ sf := uint32((i.u2 >> 32) & 0b1)
nzcv := uint32(i.u2 & 0b1111)
cond := uint32(condFlag(i.u1))
imm := uint32(i.rm.data & 0b11111)
@@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) {
sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv,
)
case movFromFPSR:
- rt := regNumberInEncoding[i.rd.realReg()]
+ rt := regNumberInEncoding[i.rd.RealReg()]
c.Emit4Bytes(encodeSystemRegisterMove(rt, true))
case movToFPSR:
rt := regNumberInEncoding[i.rn.realReg()]
@@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) {
c.Emit4Bytes(encodeAtomicRmw(
atomicRmwOp(i.u1),
regNumberInEncoding[i.rm.realReg()],
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2),
))
case atomicCas:
c.Emit4Bytes(encodeAtomicCas(
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
regNumberInEncoding[i.rm.realReg()],
regNumberInEncoding[i.rn.realReg()],
uint32(i.u2),
@@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) {
case atomicLoad:
c.Emit4Bytes(encodeAtomicLoadStore(
regNumberInEncoding[i.rn.realReg()],
- regNumberInEncoding[i.rd.realReg()],
+ regNumberInEncoding[i.rd.RealReg()],
uint32(i.u2),
1,
))
@@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32
// encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
func encodeCnvBetweenFloatInt(i *instruction) uint32 {
- rd := regNumberInEncoding[i.rd.realReg()]
+ rd := regNumberInEncoding[i.rd.RealReg()]
rn := regNumberInEncoding[i.rn.realReg()]
var opcode uint32
@@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {
rmode = 0b00
signed := i.u1 == 1
- src64bit := i.u2 == 1
- dst64bit := i.u3 == 1
+ src64bit := i.u2&1 != 0
+ dst64bit := i.u2&2 != 0
if signed {
opcode = 0b010
} else {
@@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {
rmode = 0b11
signed := i.u1 == 1
- src64bit := i.u2 == 1
- dst64bit := i.u3 == 1
+ src64bit := i.u2&1 != 0
+ dst64bit := i.u2&2 != 0
if signed {
opcode = 0b000
@@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) {
// https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en
//
// "shift" must have been divided by 16 at this point.
-func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) {
+func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) {
ret = rd
ret |= uint32(imm&0xffff) << 5
- ret |= (uint32(shift)) << 21
+ ret |= (shift) << 21
ret |= 0b100101 << 23
ret |= opc << 29
- ret |= uint32(_64bit) << 31
+ ret |= _64bit << 31
return
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
index 698b382d4..6c6824fb0 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go
@@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {
func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
- instr.asMOVZ(dst, v, uint64(shift), dst64)
+ instr.asMOVZ(dst, v, uint32(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
- instr.asMOVK(dst, v, uint64(shift), dst64)
+ instr.asMOVK(dst, v, uint32(shift), dst64)
m.insert(instr)
}
func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {
instr := m.allocateInstr()
- instr.asMOVN(dst, v, uint64(shift), dst64)
+ instr.asMOVN(dst, v, uint32(shift), dst64)
m.insert(instr)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
index 2bb234e8c..048bf3204 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go
@@ -52,11 +52,11 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) {
maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32)
m.lowerConstantI32(maxIndexReg, int32(len(targets)-1))
subs := m.allocateInstr()
- subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false)
+ subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false)
m.insert(subs)
csel := m.allocateInstr()
adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32)
- csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false)
+ csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false)
m.insert(csel)
brSequence := m.allocateInstr()
@@ -249,7 +249,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerSelectVec(rc, rn, rm, rd)
} else {
m.lowerSelect(c, x, y, instr.Return())
@@ -270,7 +270,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, ctx := instr.Arg2()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
ctxVReg := m.compiler.VRegOf(ctx)
m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64,
result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat)
@@ -278,7 +278,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, ctx := instr.Arg2()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
ctxVReg := m.compiler.VRegOf(ctx)
m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64,
result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat)
@@ -286,25 +286,25 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x := instr.Arg()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)
case ssa.OpcodeFcvtFromUint:
x := instr.Arg()
result := instr.Return()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)
case ssa.OpcodeFdemote:
v := instr.Arg()
rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
cnt := m.allocateInstr()
cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false)
m.insert(cnt)
case ssa.OpcodeFpromote:
v := instr.Arg()
rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
cnt := m.allocateInstr()
cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true)
m.insert(cnt)
@@ -343,15 +343,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
ctxVReg := m.compiler.VRegOf(ctx)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv)
case ssa.OpcodeSrem, ssa.OpcodeUrem:
x, y, ctx := instr.Arg3()
ctxVReg := m.compiler.VRegOf(ctx)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
- m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem)
+ rd := m.compiler.VRegOf(instr.Return())
+ m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem)
case ssa.OpcodeVconst:
result := m.compiler.VRegOf(instr.Return())
lo, hi := instr.VconstData()
@@ -362,7 +362,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x := instr.Arg()
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B)
m.insert(ins)
case ssa.OpcodeVbxor:
@@ -382,12 +382,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone)
- tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp := m.compiler.AllocateVReg(ssa.TypeV128)
// creg is overwritten by BSL, so we need to move it to the result register before the instruction
// in case when it is used somewhere else.
mov := m.allocateInstr()
- mov.asFpuMov128(tmp.nr(), creg.nr())
+ mov.asFpuMov128(tmp, creg.nr())
m.insert(mov)
ins := m.allocateInstr()
@@ -396,7 +396,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
mov2 := m.allocateInstr()
rd := m.compiler.VRegOf(instr.Return())
- mov2.asFpuMov128(rd, tmp.nr())
+ mov2.asFpuMov128(rd, tmp)
m.insert(mov2)
case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue:
x, lane := instr.ArgWithLane()
@@ -405,12 +405,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
arr = ssaLaneToArrangement(lane)
}
rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVcheckTrue(op, rm, rd, arr)
case ssa.OpcodeVhighBits:
x, lane := instr.ArgWithLane()
rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
arr := ssaLaneToArrangement(lane)
m.lowerVhighBits(rm, rd, arr)
case ssa.OpcodeVIadd:
@@ -441,9 +441,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
panic("unsupported lane " + lane.String())
}
- widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr)
- widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr)
- addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr)
+ widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr)
+ widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr)
+ addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr)
m.insert(widenLo)
m.insert(widenHi)
m.insert(addp)
@@ -493,7 +493,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVIMul(rd, rn, rm, arr)
case ssa.OpcodeVIabs:
m.lowerVecMisc(vecOpAbs, instr)
@@ -507,7 +507,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVShift(op, rd, rn, rm, arr)
case ssa.OpcodeVSqrt:
m.lowerVecMisc(vecOpFsqrt, instr)
@@ -547,18 +547,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, lane := instr.ArgWithLane()
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat)
case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint:
x, lane := instr.ArgWithLane()
arr := ssaLaneToArrangement(lane)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint)
case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow:
x, lane := instr.ArgWithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
var arr vecArrangement
switch lane {
@@ -580,7 +580,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh:
x, lane := instr.ArgWithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
arr := ssaLaneToArrangement(lane)
@@ -607,9 +607,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
}
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
- tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp := m.compiler.AllocateVReg(ssa.TypeV128)
loQxtn := m.allocateInstr()
hiQxtn := m.allocateInstr()
@@ -628,7 +628,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
m.insert(hiQxtn)
mov := m.allocateInstr()
- mov.asFpuMov128(rd.nr(), tmp.nr())
+ mov.asFpuMov128(rd, tmp)
m.insert(mov)
case ssa.OpcodeFvpromoteLow:
x, lane := instr.ArgWithLane()
@@ -637,7 +637,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
}
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S)
m.insert(ins)
case ssa.OpcodeFvdemote:
@@ -647,14 +647,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
}
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S)
m.insert(ins)
case ssa.OpcodeExtractlane:
x, index, signed, lane := instr.ExtractlaneData()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
mov := m.allocateInstr()
switch lane {
@@ -680,12 +680,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, y, index, lane := instr.InsertlaneData()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
- tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ rd := m.compiler.VRegOf(instr.Return())
+ tmpReg := m.compiler.AllocateVReg(ssa.TypeV128)
// Initially mov rn to tmp.
mov1 := m.allocateInstr()
- mov1.asFpuMov128(tmpReg.nr(), rn.nr())
+ mov1.asFpuMov128(tmpReg, rn.nr())
m.insert(mov1)
// movToVec and vecMovElement do not clear the remaining bits to zero,
@@ -709,14 +709,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
// Finally mov tmp to rd.
mov3 := m.allocateInstr()
- mov3.asFpuMov128(rd.nr(), tmpReg.nr())
+ mov3.asFpuMov128(rd, tmpReg)
m.insert(mov3)
case ssa.OpcodeSwizzle:
x, y, lane := instr.Arg2WithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
arr := ssaLaneToArrangement(lane)
@@ -729,14 +729,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
x, y, lane1, lane2 := instr.ShuffleData()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
m.lowerShuffle(rd, rn, rm, lane1, lane2)
case ssa.OpcodeSplat:
x, lane := instr.ArgWithLane()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
dup := m.allocateInstr()
switch lane {
@@ -760,12 +760,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone),
m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
- m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H))
- m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H))
- m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S))
+ m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H))
+ m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H))
+ m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S))
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
- m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr()))
+ rd := m.compiler.VRegOf(instr.Return())
+ m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr()))
case ssa.OpcodeLoadSplat:
ptr, offset, lane := instr.LoadSplatData()
@@ -794,7 +794,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {
m.executableContext.FlushPendingInstructions()
}
-func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) {
+func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) {
// `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30.
vReg, wReg := v29VReg, v30VReg
@@ -822,7 +822,7 @@ func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) {
m.insert(tbl2)
}
-func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) {
+func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
var modulo byte
switch arr {
case vecArrangement16B:
@@ -847,13 +847,13 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem
if op != ssa.OpcodeVIshl {
// Negate the amount to make this as right shift.
neg := m.allocateInstr()
- neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true)
+ neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true)
m.insert(neg)
}
// Copy the shift amount into a vector register as sshl/ushl requires it to be there.
dup := m.allocateInstr()
- dup.asVecDup(vtmp, rtmp, arr)
+ dup.asVecDup(vtmp.nr(), rtmp, arr)
m.insert(dup)
if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr {
@@ -867,7 +867,7 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem
}
}
-func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) {
+func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) {
tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
// Special case VallTrue for i64x2.
@@ -878,11 +878,11 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
// cset dst, eq
ins := m.allocateInstr()
- ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D)
+ ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D)
m.insert(ins)
addp := m.allocateInstr()
- addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D)
+ addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D)
m.insert(addp)
fcmp := m.allocateInstr()
@@ -890,7 +890,7 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
m.insert(fcmp)
cset := m.allocateInstr()
- cset.asCSet(rd.nr(), false, eq)
+ cset.asCSet(rd, false, eq)
m.insert(cset)
return
@@ -900,10 +900,10 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
ins := m.allocateInstr()
if op == ssa.OpcodeVanyTrue {
// umaxp v4?.16b, v2?.16b, v2?.16b
- ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B)
+ ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B)
} else {
// uminv d4?, v2?.4s
- ins.asVecLanes(vecOpUminv, tmp, rm, arr)
+ ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr)
}
m.insert(ins)
@@ -917,15 +917,15 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem
m.insert(movv)
fc := m.allocateInstr()
- fc.asCCmpImm(rd, uint64(0), al, 0, true)
+ fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true)
m.insert(fc)
cset := m.allocateInstr()
- cset.asCSet(rd.nr(), false, ne)
+ cset.asCSet(rd, false, ne)
m.insert(cset)
}
-func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
+func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) {
r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
@@ -947,7 +947,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Right arithmetic shift on the original vector and store the result into v1. So we have:
// v1[i] = 0xff if vi<0, 0 otherwise.
sshr := m.allocateInstr()
- sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B)
+ sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B)
m.insert(sshr)
// Load the bit mask into r0.
@@ -958,7 +958,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// dup r0 to v0.
dup := m.allocateInstr()
- dup.asVecDup(v0, r0, vecArrangement2D)
+ dup.asVecDup(v0.nr(), r0, vecArrangement2D)
m.insert(dup)
// Lane-wise logical AND with the bit mask, meaning that we have
@@ -967,23 +967,23 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Below, we use the following notation:
// wi := (1 << i) if vi<0, 0 otherwise.
and := m.allocateInstr()
- and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B)
+ and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B)
m.insert(and)
// Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have
// v0[i] = w(i+8) if i < 8, w(i-8) otherwise.
ext := m.allocateInstr()
- ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8))
+ ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8))
m.insert(ext)
// v = [w0, w8, ..., w7, w15]
zip1 := m.allocateInstr()
- zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B)
+ zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B)
m.insert(zip1)
// v.h[0] = w0 + ... + w15
addv := m.allocateInstr()
- addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H)
+ addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H)
m.insert(addv)
// Extract the v.h[0] as the result.
@@ -1006,7 +1006,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Right arithmetic shift on the original vector and store the result into v1. So we have:
// v[i] = 0xffff if vi<0, 0 otherwise.
sshr := m.allocateInstr()
- sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H)
+ sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H)
m.insert(sshr)
// Load the bit mask into r0.
@@ -1014,26 +1014,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// dup r0 to vector v0.
dup := m.allocateInstr()
- dup.asVecDup(v0, r0, vecArrangement2D)
+ dup.asVecDup(v0.nr(), r0, vecArrangement2D)
m.insert(dup)
lsl := m.allocateInstr()
- lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true)
+ lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true)
m.insert(lsl)
movv := m.allocateInstr()
- movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1))
+ movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1))
m.insert(movv)
// Lane-wise logical AND with the bitmask, meaning that we have
// v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3
// = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7
and := m.allocateInstr()
- and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B)
+ and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B)
m.insert(and)
addv := m.allocateInstr()
- addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H)
+ addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H)
m.insert(addv)
movfv := m.allocateInstr()
@@ -1055,7 +1055,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Right arithmetic shift on the original vector and store the result into v1. So we have:
// v[i] = 0xffffffff if vi<0, 0 otherwise.
sshr := m.allocateInstr()
- sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S)
+ sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S)
m.insert(sshr)
// Load the bit mask into r0.
@@ -1063,26 +1063,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// dup r0 to vector v0.
dup := m.allocateInstr()
- dup.asVecDup(v0, r0, vecArrangement2D)
+ dup.asVecDup(v0.nr(), r0, vecArrangement2D)
m.insert(dup)
lsl := m.allocateInstr()
- lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true)
+ lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true)
m.insert(lsl)
movv := m.allocateInstr()
- movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1))
+ movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1))
m.insert(movv)
// Lane-wise logical AND with the bitmask, meaning that we have
// v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1]
// = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3]
and := m.allocateInstr()
- and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B)
+ and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B)
m.insert(and)
addv := m.allocateInstr()
- addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S)
+ addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S)
m.insert(addv)
movfv := m.allocateInstr()
@@ -1102,21 +1102,21 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {
// Move the higher 64-bit int into r0.
movv1 := m.allocateInstr()
- movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false)
+ movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false)
m.insert(movv1)
// Move the sign bit into the least significant bit.
lsr1 := m.allocateInstr()
- lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true)
+ lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true)
m.insert(lsr1)
lsr2 := m.allocateInstr()
- lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true)
+ lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true)
m.insert(lsr2)
// rd = (r0<<1) | rd
lsl := m.allocateInstr()
- lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false)
+ lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false)
m.insert(lsl)
default:
panic("Unsupported " + arr.String())
@@ -1128,7 +1128,7 @@ func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) {
arr := ssaLaneToArrangement(lane)
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
ins.asVecMisc(op, rd, rn, arr)
m.insert(ins)
}
@@ -1137,22 +1137,22 @@ func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement)
ins := m.allocateInstr()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(ret))
+ rd := m.compiler.VRegOf(ret)
ins.asVecRRR(op, rd, rn, rm, arr)
m.insert(ins)
}
-func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
+func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) {
if arr != vecArrangement2D {
mul := m.allocateInstr()
mul.asVecRRR(vecOpMul, rd, rn, rm, arr)
m.insert(mul)
} else {
- tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
- tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
- tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp1 := m.compiler.AllocateVReg(ssa.TypeV128)
+ tmp2 := m.compiler.AllocateVReg(ssa.TypeV128)
+ tmp3 := m.compiler.AllocateVReg(ssa.TypeV128)
- tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmpRes := m.compiler.AllocateVReg(ssa.TypeV128)
// Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696
rev64 := m.allocateInstr()
@@ -1160,7 +1160,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
m.insert(rev64)
mul := m.allocateInstr()
- mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S)
+ mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S)
m.insert(mul)
xtn1 := m.allocateInstr()
@@ -1168,7 +1168,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
m.insert(xtn1)
addp := m.allocateInstr()
- addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S)
+ addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S)
m.insert(addp)
xtn2 := m.allocateInstr()
@@ -1179,15 +1179,15 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {
// In short, in UMLAL instruction, the result register is also one of the source register, and
// the value on the result register is significant.
shll := m.allocateInstr()
- shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S)
+ shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S)
m.insert(shll)
umlal := m.allocateInstr()
- umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S)
+ umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S)
m.insert(umlal)
mov := m.allocateInstr()
- mov.asFpuMov128(rd.nr(), tmpRes.nr())
+ mov.asFpuMov128(rd, tmpRes)
m.insert(mov)
}
}
@@ -1203,7 +1203,7 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) {
// BSL modifies the destination register, so we need to use a temporary register so that
// the actual definition of the destination register happens *after* the BSL instruction.
// That way, we can force the spill instruction to be inserted after the BSL instruction.
- tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp := m.compiler.AllocateVReg(ssa.TypeV128)
fcmgt := m.allocateInstr()
if max {
@@ -1220,17 +1220,17 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) {
res := operandNR(m.compiler.VRegOf(instr.Return()))
mov2 := m.allocateInstr()
- mov2.asFpuMov128(res.nr(), tmp.nr())
+ mov2.asFpuMov128(res.nr(), tmp)
m.insert(mov2)
}
-func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) {
+func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) {
div := m.allocateInstr()
if signed {
- div.asALU(aluOpSDiv, rd, rn, rm, _64bit)
+ div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit)
} else {
- div.asALU(aluOpUDiv, rd, rn, rm, _64bit)
+ div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit)
}
m.insert(div)
@@ -1239,11 +1239,11 @@ func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
// rd = rn-rd*rm by MSUB instruction.
msub := m.allocateInstr()
- msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit)
+ msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit)
m.insert(msub)
}
-func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) {
+func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) {
div := m.allocateInstr()
if signed {
@@ -1260,7 +1260,7 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi
// We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1"
minusOneCheck := m.allocateInstr()
// Sets eq condition if rm == -1.
- minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit)
+ minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit)
m.insert(minusOneCheck)
ccmp := m.allocateInstr()
@@ -1290,20 +1290,20 @@ func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, c
func (m *machine) lowerFcopysign(x, y, ret ssa.Value) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- var tmpI, tmpF operand
+ var tmpI, tmpF regalloc.VReg
_64 := x.Type() == ssa.TypeF64
if _64 {
- tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
- tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+ tmpF = m.compiler.AllocateVReg(ssa.TypeF64)
+ tmpI = m.compiler.AllocateVReg(ssa.TypeI64)
} else {
- tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32))
- tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+ tmpF = m.compiler.AllocateVReg(ssa.TypeF32)
+ tmpI = m.compiler.AllocateVReg(ssa.TypeI32)
}
rd := m.compiler.VRegOf(ret)
- m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64)
+ m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64)
}
-func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) {
+func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) {
// This is exactly the same code emitted by GCC for "__builtin_copysign":
//
// mov x0, -9223372036854775808
@@ -1313,26 +1313,26 @@ func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool
setMSB := m.allocateInstr()
if _64bit {
- m.lowerConstantI64(tmpI.nr(), math.MinInt64)
- setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0))
+ m.lowerConstantI64(tmpI, math.MinInt64)
+ setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0))
} else {
- m.lowerConstantI32(tmpI.nr(), math.MinInt32)
- setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0))
+ m.lowerConstantI32(tmpI, math.MinInt32)
+ setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0))
}
m.insert(setMSB)
- tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
+ tmpReg := m.compiler.AllocateVReg(ssa.TypeF64)
mov := m.allocateInstr()
- mov.asFpuMov64(tmpReg.nr(), rn.nr())
+ mov.asFpuMov64(tmpReg, rn.nr())
m.insert(mov)
vbit := m.allocateInstr()
- vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B)
+ vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B)
m.insert(vbit)
movDst := m.allocateInstr()
- movDst.asFpuMov64(rd.nr(), tmpReg.nr())
+ movDst.asFpuMov64(rd, tmpReg)
m.insert(movDst)
}
@@ -1340,7 +1340,7 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) {
v, dstType := instr.BitcastData()
srcType := v.Type()
rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone)
- rd := operandNR(m.compiler.VRegOf(instr.Return()))
+ rd := m.compiler.VRegOf(instr.Return())
srcInt := srcType.IsInt()
dstInt := dstType.IsInt()
switch {
@@ -1371,14 +1371,14 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) {
func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone)
- rd := operandNR(m.compiler.VRegOf(out))
+ rd := m.compiler.VRegOf(out)
neg := m.allocateInstr()
neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64)
m.insert(neg)
}
-func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) {
+func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) {
if !nonTrapping {
// First of all, we have to clear the FPU flags.
flagClear := m.allocateInstr()
@@ -1405,7 +1405,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
// Check if the conversion was undefined by comparing the status with 1.
// See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register
alu := m.allocateInstr()
- alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true)
+ alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true)
m.insert(alu)
// If it is not undefined, we can return the result.
@@ -1429,7 +1429,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64
}
}
-func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) {
+func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) {
cvt := m.allocateInstr()
cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit)
m.insert(cvt)
@@ -1456,7 +1456,7 @@ func (m *machine) lowerFpuBinOp(si *ssa.Instruction) {
xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
rn := m.getOperand_NR(xDef, extModeNone)
rm := m.getOperand_NR(yDef, extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64)
m.insert(instr)
}
@@ -1482,7 +1482,7 @@ func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) {
case !add && yNegated: // rn+rm = x-(-y) = x-y
aop = aluOpAdd
}
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
alu := m.allocateInstr()
alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64)
m.insert(alu)
@@ -1527,7 +1527,7 @@ func (m *machine) lowerIcmp(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)
rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext)
alu := m.allocateInstr()
- alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit)
+ alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit)
m.insert(alu)
cset := m.allocateInstr()
@@ -1542,7 +1542,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
switch flag {
case eq:
@@ -1554,7 +1554,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) {
cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr)
m.insert(cmp)
not := m.allocateInstr()
- not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B)
+ not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B)
m.insert(not)
case ge:
cmp := m.allocateInstr()
@@ -1598,7 +1598,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
switch flag {
case eq:
@@ -1610,7 +1610,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {
cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr)
m.insert(cmp)
not := m.allocateInstr()
- not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B)
+ not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B)
m.insert(not)
case ge:
cmp := m.allocateInstr()
@@ -1631,7 +1631,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {
}
}
-func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) {
+func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) {
cvt := m.allocateInstr()
if signed {
cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr)
@@ -1643,15 +1643,15 @@ func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool
if arr == vecArrangement2D {
narrow := m.allocateInstr()
if signed {
- narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S)
+ narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S)
} else {
- narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S)
+ narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S)
}
m.insert(narrow)
}
}
-func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) {
+func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) {
cvt := m.allocateInstr()
if signed {
cvt.asVecMisc(vecOpScvtf, rd, rn, arr)
@@ -1665,7 +1665,7 @@ func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) {
x, amount := si.Arg2()
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)
rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits())
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
alu := m.allocateInstr()
alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64)
@@ -1678,11 +1678,11 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult
xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
rn := m.getOperand_NR(xDef, extModeNone)
- var rd operand
+ var rd regalloc.VReg
if ignoreResult {
- rd = operandNR(xzrVReg)
+ rd = xzrVReg
} else {
- rd = operandNR(m.compiler.VRegOf(si.Return()))
+ rd = m.compiler.VRegOf(si.Return())
}
_64 := x.Type().Bits() == 64
@@ -1691,7 +1691,7 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult
c := instr.ConstantVal()
if isBitMaskImmediate(c, _64) {
// Constant bit wise operations can be lowered to a single instruction.
- alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64)
+ alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64)
m.insert(alu)
return
}
@@ -1709,25 +1709,25 @@ func (m *machine) lowerRotl(si *ssa.Instruction) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- var tmp operand
+ var tmp regalloc.VReg
if _64 {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI64)
} else {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI32)
}
- rd := operandNR(m.compiler.VRegOf(r))
+ rd := m.compiler.VRegOf(r)
// Encode rotl as neg + rotr: neg is a sub against the zero-reg.
m.lowerRotlImpl(rd, rn, rm, tmp, _64)
}
-func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) {
+func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) {
// Encode rotl as neg + rotr: neg is a sub against the zero-reg.
neg := m.allocateInstr()
neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit)
m.insert(neg)
alu := m.allocateInstr()
- alu.asALU(aluOpRotR, rd, rn, tmp, is64bit)
+ alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit)
m.insert(alu)
}
@@ -1737,7 +1737,7 @@ func (m *machine) lowerRotr(si *ssa.Instruction) {
xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)
rn := m.getOperand_NR(xDef, extModeNone)
rm := m.getOperand_NR(yDef, extModeNone)
- rd := operandNR(m.compiler.VRegOf(si.Return()))
+ rd := m.compiler.VRegOf(si.Return())
alu := m.allocateInstr()
alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64)
@@ -1797,7 +1797,7 @@ func (m *machine) lowerImul(x, y, result ssa.Value) {
// TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg.
mul := m.allocateInstr()
- mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64)
+ mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64)
m.insert(mul)
}
@@ -1849,22 +1849,22 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) {
// mov x5, v0.d[0] ;; finally we mov the result back to a GPR
//
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
ins := m.allocateInstr()
- ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0))
+ ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0))
m.insert(ins)
rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
cnt := m.allocateInstr()
- cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B)
+ cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B)
m.insert(cnt)
rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))
uaddlv := m.allocateInstr()
- uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B)
+ uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B)
m.insert(uaddlv)
mov := m.allocateInstr()
@@ -1879,32 +1879,35 @@ func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.Ex
loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true)
setExitCode := m.allocateInstr()
- setExitCode.asStore(operandNR(tmpReg1),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
- }, 32)
+ mode := m.amodePool.Allocate()
+ *mode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(),
+ }
+ setExitCode.asStore(operandNR(tmpReg1), mode, 32)
// In order to unwind the stack, we also need to push the current stack pointer:
tmp2 := m.compiler.AllocateVReg(ssa.TypeI64)
movSpToTmp := m.allocateInstr()
movSpToTmp.asMove64(tmp2, spVReg)
strSpToExecCtx := m.allocateInstr()
- strSpToExecCtx.asStore(operandNR(tmp2),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
- }, 64)
+ mode2 := m.amodePool.Allocate()
+ *mode2 = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(),
+ }
+ strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64)
// Also the address of this exit.
tmp3 := m.compiler.AllocateVReg(ssa.TypeI64)
currentAddrToTmp := m.allocateInstr()
currentAddrToTmp.asAdr(tmp3, 0)
storeCurrentAddrToExecCtx := m.allocateInstr()
- storeCurrentAddrToExecCtx.asStore(operandNR(tmp3),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
- }, 64)
+ mode3 := m.amodePool.Allocate()
+ *mode3 = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(),
+ }
+ storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64)
exitSeq := m.allocateInstr()
exitSeq.asExitSequence(execCtxVReg)
@@ -1937,7 +1940,7 @@ func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) {
alu.asALU(
aluOpSubS,
// We don't need the result, just need to set flags.
- operandNR(xzrVReg),
+ xzrVReg,
rn,
rm,
x.Type().Bits() == 64,
@@ -2012,7 +2015,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
alu.asALU(
aluOpSubS,
// We don't need the result, just need to set flags.
- operandNR(xzrVReg),
+ xzrVReg,
rn,
operandNR(xzrVReg),
c.Type().Bits() == 64,
@@ -2024,7 +2027,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)
rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)
- rd := operandNR(m.compiler.VRegOf(result))
+ rd := m.compiler.VRegOf(result)
switch x.Type() {
case ssa.TypeI32, ssa.TypeI64:
// csel rd, rn, rm, cc
@@ -2041,10 +2044,10 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {
}
}
-func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
+func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) {
// First check if `rc` is zero or not.
checkZero := m.allocateInstr()
- checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false)
+ checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false)
m.insert(checkZero)
// Then use CSETM to set all bits to one if `rc` is zero.
@@ -2054,7 +2057,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
m.insert(cset)
// Then move the bits to the result vector register.
- tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))
+ tmp2 := m.compiler.AllocateVReg(ssa.TypeV128)
dup := m.allocateInstr()
dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D)
m.insert(dup)
@@ -2067,7 +2070,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {
// Finally, move the result to the destination register.
mov2 := m.allocateInstr()
- mov2.asFpuMov128(rd.nr(), tmp2.nr())
+ mov2.asFpuMov128(rd, tmp2)
m.insert(mov2)
}
@@ -2099,28 +2102,28 @@ func (m *machine) lowerAtomicRmw(si *ssa.Instruction) {
addr, val := si.Arg2()
addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val)
rn := m.getOperand_NR(addrDef, extModeNone)
- rt := operandNR(m.compiler.VRegOf(si.Return()))
+ rt := m.compiler.VRegOf(si.Return())
rs := m.getOperand_NR(valDef, extModeNone)
_64 := si.Return().Type().Bits() == 64
- var tmp operand
+ var tmp regalloc.VReg
if _64 {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI64)
} else {
- tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32))
+ tmp = m.compiler.AllocateVReg(ssa.TypeI32)
}
- m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64)
+ m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64)
}
-func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) {
+func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) {
switch {
case negateArg:
neg := m.allocateInstr()
- neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit)
+ neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit)
m.insert(neg)
case flipArg:
flip := m.allocateInstr()
- flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit)
+ flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit)
m.insert(flip)
default:
tmp = rs
@@ -2139,32 +2142,32 @@ func (m *machine) lowerAtomicCas(si *ssa.Instruction) {
rn := m.getOperand_NR(addrDef, extModeNone)
rt := m.getOperand_NR(replDef, extModeNone)
rs := m.getOperand_NR(expDef, extModeNone)
- tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type()))
+ tmp := m.compiler.AllocateVReg(si.Return().Type())
_64 := si.Return().Type().Bits() == 64
// rs is overwritten by CAS, so we need to move it to the result register before the instruction
// in case when it is used somewhere else.
mov := m.allocateInstr()
if _64 {
- mov.asMove64(tmp.nr(), rs.nr())
+ mov.asMove64(tmp, rs.nr())
} else {
- mov.asMove32(tmp.nr(), rs.nr())
+ mov.asMove32(tmp, rs.nr())
}
m.insert(mov)
- m.lowerAtomicCasImpl(rn, tmp, rt, size)
+ m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size)
mov2 := m.allocateInstr()
rd := m.compiler.VRegOf(si.Return())
if _64 {
- mov2.asMove64(rd, tmp.nr())
+ mov2.asMove64(rd, tmp)
} else {
- mov2.asMove32(rd, tmp.nr())
+ mov2.asMove32(rd, tmp)
}
m.insert(mov2)
}
-func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) {
+func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) {
cas := m.allocateInstr()
cas.asAtomicCas(rn, rs, rt, size)
m.insert(cas)
@@ -2176,12 +2179,12 @@ func (m *machine) lowerAtomicLoad(si *ssa.Instruction) {
addrDef := m.compiler.ValueDefinition(addr)
rn := m.getOperand_NR(addrDef, extModeNone)
- rt := operandNR(m.compiler.VRegOf(si.Return()))
+ rt := m.compiler.VRegOf(si.Return())
- m.lowerAtomicLoadImpl(rn, rt, size)
+ m.lowerAtomicLoadImpl(rn.nr(), rt, size)
}
-func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) {
+func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) {
ld := m.allocateInstr()
ld.asAtomicLoad(rn, rt, size)
m.insert(ld)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
index 4842eaa38..fd0760d72 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go
@@ -24,6 +24,14 @@ type (
addressModeKind byte
)
+func resetAddressMode(a *addressMode) {
+ a.kind = 0
+ a.rn = 0
+ a.rm = 0
+ a.extOp = 0
+ a.imm = 0
+}
+
const (
// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,
// and then scaled by bits(type)/8.
@@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) {
return
}
-func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode {
+func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode {
if !offsetFitsInAddressModeKindRegSignedImm9(imm) {
panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))
}
+ mode := m.amodePool.Allocate()
if preIndex {
- return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
+ *mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}
} else {
- return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
+ *mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}
}
+ return mode
}
func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool {
@@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret
amode := m.lowerToAddressMode(ptr, offset, size)
load := m.allocateInstr()
if signed {
- load.asSLoad(operandNR(ret), amode, size)
+ load.asSLoad(ret, amode, size)
} else {
- load.asULoad(operandNR(ret), amode, size)
+ load.asULoad(ret, amode, size)
}
m.insert(load)
}
@@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.
load := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
- load.asULoad(operandNR(dst), amode, typ.Bits())
+ load.asULoad(dst, amode, typ.Bits())
case ssa.TypeF32, ssa.TypeF64:
- load.asFpuLoad(operandNR(dst), amode, typ.Bits())
+ load.asFpuLoad(dst, amode, typ.Bits())
case ssa.TypeV128:
- load.asFpuLoad(operandNR(dst), amode, 128)
+ load.asFpuLoad(dst, amode, 128)
default:
panic("TODO")
}
@@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane,
m.lowerConstantI64(offsetReg, int64(offset))
addedBase := m.addReg64ToReg64(base, offsetReg)
- rd := operandNR(m.compiler.VRegOf(ret))
+ rd := m.compiler.VRegOf(ret)
ld1r := m.allocateInstr()
ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane))
@@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) {
}
// lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions.
-func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) {
+func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) {
// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and
// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed
// to support more efficient address resolution.
@@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte
// During the construction, this might emit additional instructions.
//
// Extracted as a separate function for easy testing.
-func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) {
+func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) {
+ amode = m.amodePool.Allocate()
switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {
case a64sExist && a32sExist:
var base regalloc.VReg
base = a64s.Dequeue()
var a32 addend32
a32 = a32s.Dequeue()
- amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
+ *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}
case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):
var base regalloc.VReg
base = a64s.Dequeue()
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}
offset = 0
case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):
var base regalloc.VReg
base = a64s.Dequeue()
- amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
+ *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}
offset = 0
case a64sExist:
var base regalloc.VReg
base = a64s.Dequeue()
if !a64s.Empty() {
index := a64s.Dequeue()
- amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
+ *amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}
} else {
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
}
case a32sExist:
base32 := a32s.Dequeue()
@@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32],
if !a32s.Empty() {
index := a32s.Dequeue()
- amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
+ *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}
} else {
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}
}
default: // Only static offsets.
tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)
m.lowerConstantI64(tmpReg, offset)
- amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
+ *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}
offset = 0
}
@@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
if imm12Op, ok := asImm12Operand(uint64(c)); ok {
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true)
+ alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true)
} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok {
- alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true)
+ alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true)
} else {
tmp := m.compiler.AllocateVReg(ssa.TypeI64)
m.load64bitConst(c, tmp)
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true)
+ alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true)
}
m.insert(alu)
return
@@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {
func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true)
+ alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true)
m.insert(alu)
return
}
@@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {
func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {
rd = m.compiler.AllocateVReg(ssa.TypeI64)
alu := m.allocateInstr()
- alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true)
+ alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true)
m.insert(alu)
return
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
index b435d9ba9..5f584f928 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go
@@ -21,6 +21,8 @@ type (
regAlloc regalloc.Allocator
regAllocFn *backend.RegAllocFunction[*instruction, *machine]
+ amodePool wazevoapi.Pool[addressMode]
+
// addendsWorkQueue is used during address lowering, defined here for reuse.
addendsWorkQueue wazevoapi.Queue[ssa.Value]
addends32 wazevoapi.Queue[addend32]
@@ -105,6 +107,7 @@ func NewBackend() backend.Machine {
spillSlots: make(map[regalloc.VRegID]int64),
executableContext: newExecutableContext(),
regAlloc: regalloc.NewAllocator(regInfo),
+ amodePool: wazevoapi.NewPool[addressMode](resetAddressMode),
}
return m
}
@@ -149,6 +152,7 @@ func (m *machine) Reset() {
m.maxRequiredStackSizeForCalls = 0
m.executableContext.Reset()
m.jmpTableTargets = m.jmpTableTargets[:0]
+ m.amodePool.Reset()
}
// SetCurrentABI implements backend.Machine SetCurrentABI.
@@ -183,9 +187,8 @@ func (m *machine) allocateBrTarget() (nop *instruction, l label) {
l = ectx.AllocateLabel()
nop = m.allocateInstr()
nop.asNop0WithLabel(l)
- pos := ectx.AllocateLabelPosition(l)
+ pos := ectx.GetOrAllocateLabelPosition(l)
pos.Begin, pos.End = nop, nop
- ectx.LabelPositions[l] = pos
return
}
@@ -209,7 +212,7 @@ func (m *machine) allocateNop() *instruction {
}
func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) {
- amode := &i.amode
+ amode := i.getAmode()
switch amode.kind {
case addressModeKindResultStackSpace:
amode.imm += ret0offset
@@ -281,7 +284,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {
switch cur.kind {
case nop0:
l := cur.nop0Label()
- if pos, ok := ectx.LabelPositions[l]; ok {
+ if pos := ectx.LabelPositions[l]; pos != nil {
pos.BinaryOffset = offset + size
}
case condBr:
@@ -428,8 +431,10 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *
func (m *machine) Format() string {
ectx := m.executableContext
begins := map[*instruction]label{}
- for l, pos := range ectx.LabelPositions {
- begins[pos.Begin] = l
+ for _, pos := range ectx.LabelPositions {
+ if pos != nil {
+ begins[pos.Begin] = pos.L
+ }
}
irBlocks := map[label]ssa.BasicBlockID{}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
index 466fac464..d9032f921 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go
@@ -70,7 +70,7 @@ func (m *machine) setupPrologue() {
// +-----------------+ <----- SP
// (low address)
//
- _amode := addressModePreOrPostIndex(spVReg,
+ _amode := addressModePreOrPostIndex(m, spVReg,
-16, // stack pointer must be 16-byte aligned.
true, // Decrement before store.
)
@@ -159,7 +159,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc
sizeOfArgRetReg = tmpRegVReg
subSp := m.allocateInstr()
- subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
+ subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true)
cur = linkInstr(cur, subSp)
} else {
sizeOfArgRetReg = xzrVReg
@@ -168,7 +168,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc
// Saves the return address (lr) and the size_of_arg_ret below the SP.
// size_of_arg_ret is used for stack unwinding.
pstr := m.allocateInstr()
- amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */)
+ amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */)
pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)
cur = linkInstr(cur, pstr)
return cur
@@ -182,7 +182,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {
} else {
frameSizeReg = xzrVReg
}
- _amode := addressModePreOrPostIndex(spVReg,
+ _amode := addressModePreOrPostIndex(m, spVReg,
-16, // stack pointer must be 16-byte aligned.
true, // Decrement before store.
)
@@ -213,7 +213,7 @@ func (m *machine) postRegAlloc() {
m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]
default:
// Removes the redundant copy instruction.
- if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() {
+ if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() {
prev, next := cur.prev, cur.next
// Remove the copy instruction.
prev.next = next
@@ -286,16 +286,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
for i := range m.clobberedRegs {
vr := m.clobberedRegs[l-i] // reverse order to restore.
load := m.allocateInstr()
- amode := addressModePreOrPostIndex(spVReg,
+ amode := addressModePreOrPostIndex(m, spVReg,
16, // stack pointer must be 16-byte aligned.
false, // Increment after store.
)
// TODO: pair loads to reduce the number of instructions.
switch regTypeToRegisterSizeInBits(vr.RegType()) {
case 64: // save int reg.
- load.asULoad(operandNR(vr), amode, 64)
+ load.asULoad(vr, amode, 64)
case 128: // save vector reg.
- load.asFpuLoad(operandNR(vr), amode, 128)
+ load.asFpuLoad(vr, amode, 128)
}
cur = linkInstr(cur, load)
}
@@ -317,8 +317,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {
// SP----> +-----------------+
ldr := m.allocateInstr()
- ldr.asULoad(operandNR(lrVReg),
- addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
+ ldr.asULoad(lrVReg,
+ addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)
cur = linkInstr(cur, ldr)
if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 {
@@ -351,14 +351,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {
// sub tmp, sp, #requiredStackSize
sub := m.allocateInstr()
- sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true)
+ sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true)
cur = linkInstr(cur, sub)
} else {
// This case, we first load the requiredStackSize into the temporary register,
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
// Then subtract it.
sub := m.allocateInstr()
- sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true)
+ sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true)
cur = linkInstr(cur, sub)
}
@@ -366,16 +366,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
// ldr tmp2, [executionContext #StackBottomPtr]
ldr := m.allocateInstr()
- ldr.asULoad(operandNR(tmp2), addressMode{
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: x0VReg, // execution context is always the first argument.
imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(),
- }, 64)
+ }
+ ldr.asULoad(tmp2, amode, 64)
cur = linkInstr(cur, ldr)
// subs xzr, tmp, tmp2
subs := m.allocateInstr()
- subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true)
+ subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true)
cur = linkInstr(cur, subs)
// b.ge #imm
@@ -388,22 +390,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi
// First load the requiredStackSize into the temporary register,
cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)
setRequiredStackSize := m.allocateInstr()
- setRequiredStackSize.asStore(operandNR(tmpRegVReg),
- addressMode{
- kind: addressModeKindRegUnsignedImm12,
- // Execution context is always the first argument.
- rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
- }, 64)
+ amode := m.amodePool.Allocate()
+ *amode = addressMode{
+ kind: addressModeKindRegUnsignedImm12,
+ // Execution context is always the first argument.
+ rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(),
+ }
+ setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64)
cur = linkInstr(cur, setRequiredStackSize)
}
ldrAddress := m.allocateInstr()
- ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{
+ amode2 := m.amodePool.Allocate()
+ *amode2 = addressMode{
kind: addressModeKindRegUnsignedImm12,
rn: x0VReg, // execution context is always the first argument
imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(),
- }, 64)
+ }
+ ldrAddress.asULoad(tmpRegVReg, amode2, 64)
cur = linkInstr(cur, ldrAddress)
// Then jumps to the stack grow call sequence's address, meaning
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
index 1c8793b73..c7eb92cc2 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go
@@ -91,7 +91,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft
}
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
- var amode addressMode
+ var amode *addressMode
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
store := m.allocateInstr()
store.asStore(operandNR(v), amode, typ.Bits())
@@ -116,16 +116,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af
}
offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size())
- var amode addressMode
+ var amode *addressMode
cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)
load := m.allocateInstr()
switch typ {
case ssa.TypeI32, ssa.TypeI64:
- load.asULoad(operandNR(v), amode, typ.Bits())
+ load.asULoad(v, amode, typ.Bits())
case ssa.TypeF32, ssa.TypeF64:
- load.asFpuLoad(operandNR(v), amode, typ.Bits())
+ load.asFpuLoad(v, amode, typ.Bits())
case ssa.TypeV128:
- load.asFpuLoad(operandNR(v), amode, 128)
+ load.asFpuLoad(v, amode, 128)
default:
panic("TODO")
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
index 3f36c84e5..655370786 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go
@@ -35,7 +35,7 @@ type (
iter int
reversePostOrderBlocks []RegAllocBlock[I, m]
// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks.
- labelToRegAllocBlockIndex map[Label]int
+ labelToRegAllocBlockIndex [] /* Label to */ int
loopNestingForestRoots []ssa.BasicBlock
}
@@ -56,10 +56,9 @@ type (
// NewRegAllocFunction returns a new RegAllocFunction.
func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {
return &RegAllocFunction[I, M]{
- m: m,
- ssb: ssb,
- c: c,
- labelToRegAllocBlockIndex: make(map[Label]int),
+ m: m,
+ ssb: ssb,
+ c: c,
}
}
@@ -74,6 +73,9 @@ func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end
end: end,
id: int(sb.ID()),
})
+ if len(f.labelToRegAllocBlockIndex) <= int(l) {
+ f.labelToRegAllocBlockIndex = append(f.labelToRegAllocBlockIndex, make([]int, int(l)-len(f.labelToRegAllocBlockIndex)+1)...)
+ }
f.labelToRegAllocBlockIndex[l] = i
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
index b4450d56f..eacb6a7ef 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go
@@ -60,9 +60,8 @@ type (
phiDefInstListPool wazevoapi.Pool[phiDefInstList]
// Followings are re-used during various places.
- blks []Block
- reals []RealReg
- currentOccupants regInUseSet
+ blks []Block
+ reals []RealReg
// Following two fields are updated while iterating the blocks in the reverse postorder.
state state
@@ -755,7 +754,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
killSet := a.reals[:0]
// Gather the set of registers that will be used in the current instruction.
- for _, use := range instr.Uses(&a.vs) {
+ uses := instr.Uses(&a.vs)
+ for _, use := range uses {
if use.IsRealReg() {
r := use.RealReg()
currentUsedSet = currentUsedSet.add(r)
@@ -770,7 +770,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
}
}
- for i, use := range instr.Uses(&a.vs) {
+ for i, use := range uses {
if !use.IsRealReg() {
vs := s.getVRegState(use.ID())
killed := vs.lastUse == pc
@@ -944,8 +944,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {
func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {
s := &a.state
- for i := 0; i < 64; i++ {
- allocated := RealReg(i)
+ for allocated := RealReg(0); allocated < 64; allocated++ {
if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.
continue
}
@@ -974,11 +973,10 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
bID := blk.ID()
blkSt := a.getOrAllocateBlockState(bID)
desiredOccupants := &blkSt.startRegs
- aliveOnRegVRegs := make(map[VReg]RealReg)
- for i := 0; i < 64; i++ {
- r := RealReg(i)
- if v := blkSt.startRegs.get(r); v.Valid() {
- aliveOnRegVRegs[v] = r
+ var desiredOccupantsSet RegSet
+ for i, v := range desiredOccupants {
+ if v != VRegInvalid {
+ desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))
}
}
@@ -987,56 +985,38 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
}
s.currentBlockID = bID
- a.updateLiveInVRState(a.getOrAllocateBlockState(bID))
+ a.updateLiveInVRState(blkSt)
- currentOccupants := &a.currentOccupants
for i := 0; i < preds; i++ {
- currentOccupants.reset()
if i == blkSt.startFromPredIndex {
continue
}
- currentOccupantsRev := make(map[VReg]RealReg)
pred := blk.Pred(i)
predSt := a.getOrAllocateBlockState(pred.ID())
- for ii := 0; ii < 64; ii++ {
- r := RealReg(ii)
- if v := predSt.endRegs.get(r); v.Valid() {
- if _, ok := aliveOnRegVRegs[v]; !ok {
- continue
- }
- currentOccupants.add(r, v)
- currentOccupantsRev[v] = r
- }
- }
s.resetAt(predSt)
// Finds the free registers if any.
intTmp, floatTmp := VRegInvalid, VRegInvalid
if intFree := s.findAllocatable(
- a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set,
+ a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet,
); intFree != RealRegInvalid {
intTmp = FromRealReg(intFree, RegTypeInt)
}
if floatFree := s.findAllocatable(
- a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set,
+ a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet,
); floatFree != RealRegInvalid {
floatTmp = FromRealReg(floatFree, RegTypeFloat)
}
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
- }
-
- for ii := 0; ii < 64; ii++ {
- r := RealReg(ii)
+ for r := RealReg(0); r < 64; r++ {
desiredVReg := desiredOccupants.get(r)
if !desiredVReg.Valid() {
continue
}
- currentVReg := currentOccupants.get(r)
+ currentVReg := s.regsInUse.get(r)
if desiredVReg.ID() == currentVReg.ID() {
continue
}
@@ -1048,86 +1028,95 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {
} else {
tmpRealReg = floatTmp
}
- a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ)
+ a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ)
}
}
}
+// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`.
+//
+// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor.
+// - desiredVReg is the desired VReg value that should be on the register `r`.
+// - freeReg is the temporary register that can be used to swap the values, which may or may not be used.
+// - typ is the register type of the `r`.
func (a *Allocator) reconcileEdge(f Function,
r RealReg,
pred Block,
- currentOccupants *regInUseSet,
- currentOccupantsRev map[VReg]RealReg,
currentVReg, desiredVReg VReg,
freeReg VReg,
typ RegType,
) {
+ // There are four cases to consider:
+ // 1. currentVReg is valid, but desiredVReg is on the stack.
+ // 2. Both currentVReg and desiredVReg are valid.
+ // 3. Desired is on a different register than `r` and currentReg is not valid.
+ // 4. Desired is on the stack and currentReg is not valid.
+
s := &a.state
if currentVReg.Valid() {
- // Both are on reg.
- er, ok := currentOccupantsRev[desiredVReg]
- if !ok {
+ desiredState := s.getVRegState(desiredVReg.ID())
+ er := desiredState.r
+ if er == RealRegInvalid {
+ // Case 1: currentVReg is valid, but desiredVReg is on the stack.
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",
desiredVReg.ID(), a.regInfo.RealRegName(r),
)
}
- // This case is that the desired value is on the stack, but currentVReg is on the target register.
- // We need to move the current value to the stack, and reload the desired value.
+ // We need to move the current value to the stack, and reload the desired value into the register.
// TODO: we can do better here.
f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion())
- delete(currentOccupantsRev, currentVReg)
+ s.releaseRealReg(r)
s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
- currentOccupants.add(r, desiredVReg)
- currentOccupantsRev[desiredVReg] = r
+ s.useRealReg(r, desiredVReg)
return
- }
-
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
- desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
+ } else {
+ // Case 2: Both currentVReg and desiredVReg are valid.
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n",
+ desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er),
+ )
+ }
+ // This case, we need to swap the values between the current and desired values.
+ f.SwapBefore(
+ currentVReg.SetRealReg(r),
+ desiredVReg.SetRealReg(er),
+ freeReg,
+ pred.LastInstrForInsertion(),
)
- }
- f.SwapBefore(
- currentVReg.SetRealReg(r),
- desiredVReg.SetRealReg(er),
- freeReg,
- pred.LastInstrForInsertion(),
- )
- s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
- currentOccupantsRev[desiredVReg] = r
- currentOccupantsRev[currentVReg] = er
- currentOccupants.add(r, desiredVReg)
- currentOccupants.add(er, currentVReg)
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
+ s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg())
+ s.releaseRealReg(r)
+ s.releaseRealReg(er)
+ s.useRealReg(r, desiredVReg)
+ s.useRealReg(er, currentVReg)
+ if wazevoapi.RegAllocLoggingEnabled {
+ fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er))
+ }
}
} else {
- // Desired is on reg, but currently the target register is not used.
if wazevoapi.RegAllocLoggingEnabled {
fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",
desiredVReg.ID(), a.regInfo.RealRegName(r),
)
}
- if currentReg, ok := currentOccupantsRev[desiredVReg]; ok {
+ if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid {
+ // Case 3: Desired is on a different register than `r` and currentReg is not valid.
+ // We simply need to move the desired value to the register.
f.InsertMoveBefore(
FromRealReg(r, typ),
desiredVReg.SetRealReg(currentReg),
pred.LastInstrForInsertion(),
)
- currentOccupants.remove(currentReg)
+ s.releaseRealReg(currentReg)
} else {
+ // Case 4: Both currentVReg and desiredVReg are not valid.
+ // We simply need to reload the desired value into the register.
s.getVRegState(desiredVReg.ID()).recordReload(f, pred)
f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())
}
- currentOccupantsRev[desiredVReg] = r
- currentOccupants.add(r, desiredVReg)
- }
-
- if wazevoapi.RegAllocLoggingEnabled {
- fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo))
+ s.useRealReg(r, desiredVReg)
}
}
@@ -1169,8 +1158,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {
}
for pos != definingBlk {
st := a.getOrAllocateBlockState(pos.ID())
- for ii := 0; ii < 64; ii++ {
- rr := RealReg(ii)
+ for rr := RealReg(0); rr < 64; rr++ {
if st.startRegs.get(rr) == v {
r = rr
// Already in the register, so we can place the spill at the beginning of the block.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
index e9bf60661..04a8e8f4d 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go
@@ -46,23 +46,24 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {
}
}
-type regInUseSet struct {
- set RegSet
- vrs [64]VReg
+type regInUseSet [64]VReg
+
+func newRegInUseSet() regInUseSet {
+ var ret regInUseSet
+ ret.reset()
+ return ret
}
func (rs *regInUseSet) reset() {
- rs.set = 0
- for i := range rs.vrs {
- rs.vrs[i] = VRegInvalid
+ for i := range rs {
+ rs[i] = VRegInvalid
}
}
func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
var ret []string
- for i := 0; i < 64; i++ {
- if rs.set&(1<<uint(i)) != 0 {
- vr := rs.vrs[i]
+ for i, vr := range rs {
+ if vr != VRegInvalid {
ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))
}
}
@@ -70,39 +71,28 @@ func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused
}
func (rs *regInUseSet) has(r RealReg) bool {
- if r >= 64 {
- return false
- }
- return rs.set&(1<<uint(r)) != 0
+ return r < 64 && rs[r] != VRegInvalid
}
func (rs *regInUseSet) get(r RealReg) VReg {
- if r >= 64 {
- return VRegInvalid
- }
- return rs.vrs[r]
+ return rs[r]
}
func (rs *regInUseSet) remove(r RealReg) {
- if r >= 64 {
- return
- }
- rs.set &= ^(1 << uint(r))
- rs.vrs[r] = VRegInvalid
+ rs[r] = VRegInvalid
}
func (rs *regInUseSet) add(r RealReg, vr VReg) {
if r >= 64 {
return
}
- rs.set |= 1 << uint(r)
- rs.vrs[r] = vr
+ rs[r] = vr
}
func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) {
- for i := 0; i < 64; i++ {
- if rs.set&(1<<uint(i)) != 0 {
- f(RealReg(i), rs.vrs[i])
+ for i, vr := range rs {
+ if vr != VRegInvalid {
+ f(RealReg(i), vr)
}
}
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
index 3379c4dde..72ce44e26 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go
@@ -2,7 +2,6 @@ package wazevo
import (
"context"
- "encoding/binary"
"fmt"
"reflect"
"runtime"
@@ -310,15 +309,6 @@ func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint6
*argRes = uint64(0xffffffff) // = -1 in signed 32-bit integer.
} else {
*argRes = uint64(res)
- calleeOpaque := opaqueViewFromPtr(uintptr(unsafe.Pointer(c.execCtx.callerModuleContextPtr)))
- if mod.Source.MemorySection != nil { // Local memory.
- putLocalMemory(calleeOpaque, 8 /* local memory begins at 8 */, mem)
- } else {
- // Imported memory's owner at offset 16 of the callerModuleContextPtr.
- opaquePtr := uintptr(binary.LittleEndian.Uint64(calleeOpaque[16:]))
- importedMemOwner := opaqueViewFromPtr(opaquePtr)
- putLocalMemory(importedMemOwner, 8 /* local memory begins at 8 */, mem)
- }
}
c.execCtx.exitCode = wazevoapi.ExitCodeOK
afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall)
@@ -525,14 +515,6 @@ func (c *callEngine) callerModuleInstance() *wasm.ModuleInstance {
return moduleInstanceFromOpaquePtr(c.execCtx.callerModuleContextPtr)
}
-func opaqueViewFromPtr(ptr uintptr) []byte {
- var opaque []byte
- sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaque))
- sh.Data = ptr
- setSliceLimits(sh, 24, 24)
- return opaque
-}
-
const callStackCeiling = uintptr(50000000) // in uint64 (8 bytes) == 400000000 bytes in total == 400mb.
func (c *callEngine) growStackWithGuarded() (newSP uintptr, newFP uintptr, err error) {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
index f7c0450ae..e49353dc8 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go
@@ -31,6 +31,13 @@ func fileCacheKey(m *wasm.Module) (ret filecache.Key) {
s := sha256.New()
s.Write(m.ID[:])
s.Write(magic)
+ // Write the CPU features so that we can cache the compiled module for the same CPU.
+ // This prevents the incompatible CPU features from being used.
+ cpu := platform.CpuFeatures.Raw()
+ // Reuse the `ret` buffer to write the first 8 bytes of the CPU features so that we can avoid the allocation.
+ binary.LittleEndian.PutUint64(ret[:8], cpu)
+ s.Write(ret[:8])
+ // Finally, write the hash to the ret buffer.
s.Sum(ret[:0])
return
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
index 873a35a55..42cc21dcd 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go
@@ -301,26 +301,7 @@ func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) {
st := WasmTypeToSSAType(typ)
variable := c.ssaBuilder.DeclareVariable(st)
c.setWasmLocalVariable(wasm.Index(i)+localCount, variable)
-
- zeroInst := c.ssaBuilder.AllocateInstruction()
- switch st {
- case ssa.TypeI32:
- zeroInst.AsIconst32(0)
- case ssa.TypeI64:
- zeroInst.AsIconst64(0)
- case ssa.TypeF32:
- zeroInst.AsF32const(0)
- case ssa.TypeF64:
- zeroInst.AsF64const(0)
- case ssa.TypeV128:
- zeroInst.AsVconst(0, 0)
- default:
- panic("TODO: " + wasm.ValueTypeName(typ))
- }
-
- c.ssaBuilder.InsertInstruction(zeroInst)
- value := zeroInst.Return()
- c.ssaBuilder.DefineVariable(variable, value, entry)
+ c.ssaBuilder.InsertZeroValue(st)
}
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
index 5096a6365..ff963e605 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go
@@ -1086,16 +1086,8 @@ func (c *Compiler) lowerCurrentOpcode() {
break
}
variable := c.localVariable(index)
- if _, ok := c.m.NonStaticLocals[c.wasmLocalFunctionIndex][index]; ok {
- state.push(builder.MustFindValue(variable))
- } else {
- // If a local is static, we can simply find it in the entry block which is either a function param
- // or a zero value. This fast pass helps to avoid the overhead of searching the entire function plus
- // avoid adding unnecessary block arguments.
- // TODO: I think this optimization should be done in a SSA pass like passRedundantPhiEliminationOpt,
- // but somehow there's some corner cases that it fails to optimize.
- state.push(builder.MustFindValueInBlk(variable, c.ssaBuilder.EntryBlock()))
- }
+ state.push(builder.MustFindValue(variable))
+
case wasm.OpcodeLocalSet:
index := c.readI32u()
if state.unreachable {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
index ba8f546c0..efa1b9bba 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go
@@ -86,16 +86,6 @@ func newAlignedOpaque(size int) moduleContextOpaque {
return buf
}
-func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) {
- s := uint64(len(mem.Buffer))
- var b uint64
- if len(mem.Buffer) > 0 {
- b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0])))
- }
- binary.LittleEndian.PutUint64(opaque[offset:], b)
- binary.LittleEndian.PutUint64(opaque[offset+8:], s)
-}
-
func (m *moduleEngine) setupOpaque() {
inst := m.module
offsets := &m.parent.offsets
@@ -106,7 +96,7 @@ func (m *moduleEngine) setupOpaque() {
)
if lm := offsets.LocalMemoryBegin; lm >= 0 {
- putLocalMemory(opaque, lm, inst.MemoryInstance)
+ m.putLocalMemory()
}
// Note: imported memory is resolved in ResolveImportedFunction.
@@ -227,6 +217,25 @@ func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) {
// OwnsGlobals implements the same method as documented on wasm.ModuleEngine.
func (m *moduleEngine) OwnsGlobals() bool { return true }
+// MemoryGrown implements wasm.ModuleEngine.
+func (m *moduleEngine) MemoryGrown() {
+ m.putLocalMemory()
+}
+
+// putLocalMemory writes the local memory buffer pointer and length to the opaque buffer.
+func (m *moduleEngine) putLocalMemory() {
+ mem := m.module.MemoryInstance
+ offset := m.parent.offsets.LocalMemoryBegin
+
+ s := uint64(len(mem.Buffer))
+ var b uint64
+ if len(mem.Buffer) > 0 {
+ b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0])))
+ }
+ binary.LittleEndian.PutUint64(m.opaque[offset:], b)
+ binary.LittleEndian.PutUint64(m.opaque[offset+8:], s)
+}
+
// ResolveImportedFunction implements wasm.ModuleEngine.
func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {
executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go
index 10b6b4b62..39627b989 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go
@@ -49,21 +49,12 @@ type BasicBlock interface {
// ReturnBlock returns ture if this block represents the function return.
ReturnBlock() bool
- // FormatHeader returns the debug string of this block, not including instruction.
- FormatHeader(b Builder) string
-
// Valid is true if this block is still valid even after optimizations.
Valid() bool
// Sealed is true if this block has been sealed.
Sealed() bool
- // BeginPredIterator returns the first predecessor of this block.
- BeginPredIterator() BasicBlock
-
- // NextPredIterator returns the next predecessor of this block.
- NextPredIterator() BasicBlock
-
// Preds returns the number of predecessors of this block.
Preds() int
@@ -88,10 +79,11 @@ type (
basicBlock struct {
id BasicBlockID
rootInstr, currentInstr *Instruction
- params []blockParam
- predIter int
- preds []basicBlockPredecessorInfo
- success []*basicBlock
+ // params are Values that represent parameters to a basicBlock.
+ // Each parameter can be considered as an output of PHI instruction in traditional SSA.
+ params []Value
+ preds []basicBlockPredecessorInfo
+ success []*basicBlock
// singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called.
singlePred *basicBlock
// lastDefinitions maps Variable to its last definition in this block.
@@ -116,11 +108,14 @@ type (
// loopNestingForestChildren holds the children of this block in the loop nesting forest.
// Non-empty if and only if this block is a loop header (i.e. loopHeader=true)
- loopNestingForestChildren []BasicBlock
+ loopNestingForestChildren wazevoapi.VarLength[BasicBlock]
// reversePostOrder is used to sort all the blocks in the function in reverse post order.
// This is used in builder.LayoutBlocks.
- reversePostOrder int
+ reversePostOrder int32
+
+ // visited is used during various traversals.
+ visited int32
// child and sibling are the ones in the dominator tree.
child, sibling *basicBlock
@@ -128,15 +123,6 @@ type (
// BasicBlockID is the unique ID of a basicBlock.
BasicBlockID uint32
- // blockParam implements Value and represents a parameter to a basicBlock.
- blockParam struct {
- // value is the Value that corresponds to the parameter in this block,
- // and can be considered as an output of PHI instruction in traditional SSA.
- value Value
- // typ is the type of the parameter.
- typ Type
- }
-
unknownValue struct {
// variable is the variable that this unknownValue represents.
variable Variable
@@ -145,6 +131,9 @@ type (
}
)
+// basicBlockVarLengthNil is the default nil value for basicBlock.loopNestingForestChildren.
+var basicBlockVarLengthNil = wazevoapi.NewNilVarLength[BasicBlock]()
+
const basicBlockIDReturnBlock = 0xffffffff
// Name implements BasicBlock.Name.
@@ -190,13 +179,13 @@ func (bb *basicBlock) ReturnBlock() bool {
// AddParam implements BasicBlock.AddParam.
func (bb *basicBlock) AddParam(b Builder, typ Type) Value {
paramValue := b.allocateValue(typ)
- bb.params = append(bb.params, blockParam{typ: typ, value: paramValue})
+ bb.params = append(bb.params, paramValue)
return paramValue
}
// addParamOn adds a parameter to this block whose value is already allocated.
-func (bb *basicBlock) addParamOn(typ Type, value Value) {
- bb.params = append(bb.params, blockParam{typ: typ, value: value})
+func (bb *basicBlock) addParamOn(value Value) {
+ bb.params = append(bb.params, value)
}
// Params implements BasicBlock.Params.
@@ -206,8 +195,7 @@ func (bb *basicBlock) Params() int {
// Param implements BasicBlock.Param.
func (bb *basicBlock) Param(i int) Value {
- p := &bb.params[i]
- return p.value
+ return bb.params[i]
}
// Valid implements BasicBlock.Valid.
@@ -248,22 +236,6 @@ func (bb *basicBlock) NumPreds() int {
return len(bb.preds)
}
-// BeginPredIterator implements BasicBlock.BeginPredIterator.
-func (bb *basicBlock) BeginPredIterator() BasicBlock {
- bb.predIter = 0
- return bb.NextPredIterator()
-}
-
-// NextPredIterator implements BasicBlock.NextPredIterator.
-func (bb *basicBlock) NextPredIterator() BasicBlock {
- if bb.predIter >= len(bb.preds) {
- return nil
- }
- pred := bb.preds[bb.predIter].blk
- bb.predIter++
- return pred
-}
-
// Preds implements BasicBlock.Preds.
func (bb *basicBlock) Preds() int {
return len(bb.preds)
@@ -305,7 +277,8 @@ func resetBasicBlock(bb *basicBlock) {
bb.unknownValues = bb.unknownValues[:0]
bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions)
bb.reversePostOrder = -1
- bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0]
+ bb.visited = 0
+ bb.loopNestingForestChildren = basicBlockVarLengthNil
bb.loopHeader = false
bb.sibling = nil
bb.child = nil
@@ -335,11 +308,11 @@ func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) {
pred.success = append(pred.success, bb)
}
-// FormatHeader implements BasicBlock.FormatHeader.
-func (bb *basicBlock) FormatHeader(b Builder) string {
+// formatHeader returns the string representation of the header of the basicBlock.
+func (bb *basicBlock) formatHeader(b Builder) string {
ps := make([]string, len(bb.params))
for i, p := range bb.params {
- ps[i] = p.value.formatWithType(b)
+ ps[i] = p.formatWithType(b)
}
if len(bb.preds) > 0 {
@@ -398,7 +371,7 @@ func (bb *basicBlock) String() string {
// LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren.
func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock {
- return bb.loopNestingForestChildren
+ return bb.loopNestingForestChildren.View()
}
// LoopHeader implements BasicBlock.LoopHeader.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go
index 1fc84d2ea..0b700c4b1 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go
@@ -54,9 +54,6 @@ type Builder interface {
// MustFindValue searches the latest definition of the given Variable and returns the result.
MustFindValue(variable Variable) Value
- // MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock.
- MustFindValueInBlk(variable Variable, blk BasicBlock) Value
-
// FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock.
// If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid.
FindValueInLinearPath(variable Variable) Value
@@ -127,7 +124,11 @@ type Builder interface {
// Idom returns the immediate dominator of the given BasicBlock.
Idom(blk BasicBlock) BasicBlock
+ // VarLengthPool returns the VarLengthPool of Value.
VarLengthPool() *wazevoapi.VarLengthPool[Value]
+
+ // InsertZeroValue inserts a zero value constant instruction of the given type.
+ InsertZeroValue(t Type)
}
// NewBuilder returns a new Builder implementation.
@@ -135,10 +136,10 @@ func NewBuilder() Builder {
return &builder{
instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction),
basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock),
+ varLengthBasicBlockPool: wazevoapi.NewVarLengthPool[BasicBlock](),
varLengthPool: wazevoapi.NewVarLengthPool[Value](),
valueAnnotations: make(map[ValueID]string),
signatures: make(map[SignatureID]*Signature),
- blkVisited: make(map[*basicBlock]int),
valueIDAliases: make(map[ValueID]Value),
redundantParameterIndexToValue: make(map[int]Value),
returnBlk: &basicBlock{id: basicBlockIDReturnBlock},
@@ -177,12 +178,13 @@ type builder struct {
dominators []*basicBlock
sparseTree dominatorSparseTree
+ varLengthBasicBlockPool wazevoapi.VarLengthPool[BasicBlock]
+
// loopNestingForestRoots are the roots of the loop nesting forest.
loopNestingForestRoots []BasicBlock
// The followings are used for optimization passes/deterministic compilation.
instStack []*Instruction
- blkVisited map[*basicBlock]int
valueIDToInstruction []*Instruction
blkStack []*basicBlock
blkStack2 []*basicBlock
@@ -200,6 +202,32 @@ type builder struct {
donePostBlockLayoutPasses bool
currentSourceOffset SourceOffset
+
+ // zeros are the zero value constants for each type.
+ zeros [typeEnd]Value
+}
+
+// InsertZeroValue implements Builder.InsertZeroValue.
+func (b *builder) InsertZeroValue(t Type) {
+ if b.zeros[t].Valid() {
+ return
+ }
+ zeroInst := b.AllocateInstruction()
+ switch t {
+ case TypeI32:
+ zeroInst.AsIconst32(0)
+ case TypeI64:
+ zeroInst.AsIconst64(0)
+ case TypeF32:
+ zeroInst.AsF32const(0)
+ case TypeF64:
+ zeroInst.AsF64const(0)
+ case TypeV128:
+ zeroInst.AsVconst(0, 0)
+ default:
+ panic("TODO: " + t.String())
+ }
+ b.zeros[t] = zeroInst.Insert(b).Return()
}
func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] {
@@ -215,10 +243,12 @@ func (b *builder) ReturnBlock() BasicBlock {
func (b *builder) Init(s *Signature) {
b.nextVariable = 0
b.currentSignature = s
+ b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid}
resetBasicBlock(b.returnBlk)
b.instructionsPool.Reset()
b.basicBlocksPool.Reset()
b.varLengthPool.Reset()
+ b.varLengthBasicBlockPool.Reset()
b.donePreBlockLayoutPasses = false
b.doneBlockLayout = false
b.donePostBlockLayoutPasses = false
@@ -231,11 +261,6 @@ func (b *builder) Init(s *Signature) {
b.blkStack2 = b.blkStack2[:0]
b.dominators = b.dominators[:0]
b.loopNestingForestRoots = b.loopNestingForestRoots[:0]
-
- for i := 0; i < b.basicBlocksPool.Allocated(); i++ {
- blk := b.basicBlocksPool.View(i)
- delete(b.blkVisited, blk)
- }
b.basicBlocksPool.Reset()
for v := ValueID(0); v < b.nextValueID; v++ {
@@ -448,11 +473,6 @@ func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Valu
return ValueInvalid
}
-func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value {
- typ := b.definedVariableType(variable)
- return b.findValue(typ, variable, blk.(*basicBlock))
-}
-
// MustFindValue implements Builder.MustFindValue.
func (b *builder) MustFindValue(variable Variable) Value {
typ := b.definedVariableType(variable)
@@ -482,6 +502,9 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value
value: value,
})
return value
+ } else if blk.EntryBlock() {
+ // If this is the entry block, we reach the uninitialized variable which has zero value.
+ return b.zeros[b.definedVariableType(variable)]
}
if pred := blk.singlePred; pred != nil {
@@ -495,21 +518,42 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value
// If this block has multiple predecessors, we have to gather the definitions,
// and treat them as an argument to this block.
//
- // The first thing is to define a new parameter to this block which may or may not be redundant, but
- // later we eliminate trivial params in an optimization pass. This must be done before finding the
- // definitions in the predecessors so that we can break the cycle.
- paramValue := blk.AddParam(b, typ)
- b.DefineVariable(variable, paramValue, blk)
-
- // After the new param is added, we have to manipulate the original branching instructions
- // in predecessors so that they would pass the definition of `variable` as the argument to
- // the newly added PHI.
+ // But before that, we have to check if the possible definitions are the same Value.
+ tmpValue := b.allocateValue(typ)
+ // Break the cycle by defining the variable with the tmpValue.
+ b.DefineVariable(variable, tmpValue, blk)
+ // Check all the predecessors if they have the same definition.
+ uniqueValue := ValueInvalid
for i := range blk.preds {
- pred := &blk.preds[i]
- value := b.findValue(typ, variable, pred.blk)
- pred.branch.addArgumentBranchInst(b, value)
+ predValue := b.findValue(typ, variable, blk.preds[i].blk)
+ if uniqueValue == ValueInvalid {
+ uniqueValue = predValue
+ } else if uniqueValue != predValue {
+ uniqueValue = ValueInvalid
+ break
+ }
+ }
+
+ if uniqueValue != ValueInvalid {
+ // If all the predecessors have the same definition, we can use that value.
+ b.DefineVariable(variable, uniqueValue, blk)
+ b.alias(tmpValue, uniqueValue)
+ return uniqueValue
+ } else {
+ // Otherwise, add the tmpValue to this block as a parameter which may or may not be redundant, but
+ // later we eliminate trivial params in an optimization pass. This must be done before finding the
+ // definitions in the predecessors so that we can break the cycle.
+ blk.addParamOn(tmpValue)
+ // After the new param is added, we have to manipulate the original branching instructions
+ // in predecessors so that they would pass the definition of `variable` as the argument to
+ // the newly added PHI.
+ for i := range blk.preds {
+ pred := &blk.preds[i]
+ value := b.findValue(typ, variable, pred.blk)
+ pred.branch.addArgumentBranchInst(b, value)
+ }
+ return tmpValue
}
- return paramValue
}
// Seal implements Builder.Seal.
@@ -523,7 +567,7 @@ func (b *builder) Seal(raw BasicBlock) {
for _, v := range blk.unknownValues {
variable, phiValue := v.variable, v.value
typ := b.definedVariableType(variable)
- blk.addParamOn(typ, phiValue)
+ blk.addParamOn(phiValue)
for i := range blk.preds {
pred := &blk.preds[i]
predValue := b.findValue(typ, variable, pred.blk)
@@ -566,7 +610,7 @@ func (b *builder) Format() string {
}
for bb := iterBegin(); bb != nil; bb = iterNext() {
str.WriteByte('\n')
- str.WriteString(bb.FormatHeader(b))
+ str.WriteString(bb.formatHeader(b))
str.WriteByte('\n')
for cur := bb.Root(); cur != nil; cur = cur.Next() {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go
index a2e986cd1..89ec34b7e 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go
@@ -22,9 +22,9 @@ func (b *builder) RunPasses() {
func (b *builder) runPreBlockLayoutPasses() {
passSortSuccessors(b)
passDeadBlockEliminationOpt(b)
- passRedundantPhiEliminationOpt(b)
// The result of passCalculateImmediateDominators will be used by various passes below.
passCalculateImmediateDominators(b)
+ passRedundantPhiEliminationOpt(b)
passNopInstElimination(b)
// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic.
@@ -78,12 +78,11 @@ func (b *builder) runFinalizingPasses() {
// passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.
func passDeadBlockEliminationOpt(b *builder) {
entryBlk := b.entryBlk()
- b.clearBlkVisited()
b.blkStack = append(b.blkStack, entryBlk)
for len(b.blkStack) > 0 {
reachableBlk := b.blkStack[len(b.blkStack)-1]
b.blkStack = b.blkStack[:len(b.blkStack)-1]
- b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass.
+ reachableBlk.visited = 1
if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {
panic(fmt.Sprintf("%s is not sealed", reachableBlk))
@@ -94,7 +93,7 @@ func passDeadBlockEliminationOpt(b *builder) {
}
for _, succ := range reachableBlk.success {
- if _, ok := b.blkVisited[succ]; ok {
+ if succ.visited == 1 {
continue
}
b.blkStack = append(b.blkStack, succ)
@@ -102,13 +101,16 @@ func passDeadBlockEliminationOpt(b *builder) {
}
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
- if _, ok := b.blkVisited[blk]; !ok {
+ if blk.visited != 1 {
blk.invalid = true
}
+ blk.visited = 0
}
}
// passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block).
+// This requires the reverse post-order traversal to be calculated before calling this function,
+// hence passCalculateImmediateDominators must be called before this.
func passRedundantPhiEliminationOpt(b *builder) {
redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations.
@@ -118,15 +120,18 @@ func passRedundantPhiEliminationOpt(b *builder) {
// relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs,
// the maximum number of iteration was 22, which seems to be acceptable but not that small either since the
// complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands.
+ // -- Note --
+ // Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when
+ // running on the reverse post-order. It might be possible to optimize this further by using the dominator tree.
for {
changed := false
- _ = b.blockIteratorBegin() // skip entry block!
+ _ = b.blockIteratorReversePostOrderBegin() // skip entry block!
// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops!
- for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() {
+ for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() {
paramNum := len(blk.params)
for paramIndex := 0; paramIndex < paramNum; paramIndex++ {
- phiValue := blk.params[paramIndex].value
+ phiValue := blk.params[paramIndex]
redundant := true
nonSelfReferencingValue := ValueInvalid
@@ -184,7 +189,7 @@ func passRedundantPhiEliminationOpt(b *builder) {
// Still need to have the definition of the value of the PHI (previously as the parameter).
for _, redundantParamIndex := range redundantParameterIndexes {
- phiValue := blk.params[redundantParamIndex].value
+ phiValue := blk.params[redundantParamIndex]
onlyValue := b.redundantParameterIndexToValue[redundantParamIndex]
// Create an alias in this block from the only phi argument to the phi value.
b.alias(phiValue, onlyValue)
@@ -227,10 +232,10 @@ func passRedundantPhiEliminationOpt(b *builder) {
func passDeadCodeEliminationOpt(b *builder) {
nvid := int(b.nextValueID)
if nvid >= len(b.valueRefCounts) {
- b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...)
+ b.valueRefCounts = append(b.valueRefCounts, make([]int, nvid-len(b.valueRefCounts)+1)...)
}
if nvid >= len(b.valueIDToInstruction) {
- b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
+ b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, nvid-len(b.valueIDToInstruction)+1)...)
}
// First, we gather all the instructions with side effects.
@@ -350,22 +355,10 @@ func (b *builder) incRefCount(id ValueID, from *Instruction) {
b.valueRefCounts[id]++
}
-// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places.
-func (b *builder) clearBlkVisited() {
- b.blkStack2 = b.blkStack2[:0]
- for key := range b.blkVisited {
- b.blkStack2 = append(b.blkStack2, key)
- }
- for _, blk := range b.blkStack2 {
- delete(b.blkVisited, blk)
- }
- b.blkStack2 = b.blkStack2[:0]
-}
-
// passNopInstElimination eliminates the instructions which is essentially a no-op.
func passNopInstElimination(b *builder) {
if int(b.nextValueID) >= len(b.valueIDToInstruction) {
- b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...)
+ b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, int(b.nextValueID)-len(b.valueIDToInstruction)+1)...)
}
for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() {
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go
index 9068180a0..584b5eade 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go
@@ -23,8 +23,6 @@ import (
//
// This heuristic is done in maybeInvertBranches function.
func passLayoutBlocks(b *builder) {
- b.clearBlkVisited()
-
// We might end up splitting critical edges which adds more basic blocks,
// so we store the currently existing basic blocks in nonSplitBlocks temporarily.
// That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks.
@@ -47,20 +45,20 @@ func passLayoutBlocks(b *builder) {
for _, blk := range nonSplitBlocks {
for i := range blk.preds {
pred := blk.preds[i].blk
- if _, ok := b.blkVisited[pred]; ok || !pred.Valid() {
+ if pred.visited == 1 || !pred.Valid() {
continue
} else if pred.reversePostOrder < blk.reversePostOrder {
// This means the edge is critical, and this pred is the trampoline and yet to be inserted.
// Split edge trampolines must come before the destination in reverse post-order.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred)
- b.blkVisited[pred] = 0 // mark as inserted, the value is not used.
+ pred.visited = 1 // mark as inserted.
}
}
// Now that we've already added all the potential trampoline blocks incoming to this block,
// we can add this block itself.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk)
- b.blkVisited[blk] = 0 // mark as inserted, the value is not used.
+ blk.visited = 1 // mark as inserted.
if len(blk.success) < 2 {
// There won't be critical edge originating from this block.
@@ -116,7 +114,7 @@ func passLayoutBlocks(b *builder) {
if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline {
// This can be lowered as fallthrough at the end of the block.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
- b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
+ trampoline.visited = 1 // mark as inserted.
} else {
uninsertedTrampolines = append(uninsertedTrampolines, trampoline)
}
@@ -126,7 +124,7 @@ func passLayoutBlocks(b *builder) {
if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself.
// This means the critical edge was backward, so we insert after the current block immediately.
b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline)
- b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used.
+ trampoline.visited = 1 // mark as inserted.
} // If the target is forward, we can wait to insert until the target is inserted.
}
uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block.
@@ -142,8 +140,8 @@ func passLayoutBlocks(b *builder) {
if wazevoapi.SSAValidationEnabled {
for _, trampoline := range trampolines {
- if _, ok := b.blkVisited[trampoline]; !ok {
- panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b))
+ if trampoline.visited != 1 {
+ panic("BUG: trampoline block not inserted: " + trampoline.formatHeader(b))
}
trampoline.validate(b)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go
index 50cb9c475..e8288c4bd 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go
@@ -15,10 +15,6 @@ import (
// At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag.
func passCalculateImmediateDominators(b *builder) {
reversePostOrder := b.reversePostOrderedBasicBlocks[:0]
- exploreStack := b.blkStack[:0]
- b.clearBlkVisited()
-
- entryBlk := b.entryBlk()
// Store the reverse postorder from the entrypoint into reversePostOrder slice.
// This calculation of reverse postorder is not described in the paper,
@@ -28,14 +24,17 @@ func passCalculateImmediateDominators(b *builder) {
// which is a reasonable assumption as long as SSA Builder is properly used.
//
// First we push blocks in postorder iteratively visit successors of the entry block.
- exploreStack = append(exploreStack, entryBlk)
+ entryBlk := b.entryBlk()
+ exploreStack := append(b.blkStack[:0], entryBlk)
+ // These flags are used to track the state of the block in the DFS traversal.
+ // We temporarily use the reversePostOrder field to store the state.
const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2
- b.blkVisited[entryBlk] = visitStateSeen
+ entryBlk.visited = visitStateSeen
for len(exploreStack) > 0 {
tail := len(exploreStack) - 1
blk := exploreStack[tail]
exploreStack = exploreStack[:tail]
- switch b.blkVisited[blk] {
+ switch blk.visited {
case visitStateUnseen:
// This is likely a bug in the frontend.
panic("BUG: unsupported CFG")
@@ -48,16 +47,18 @@ func passCalculateImmediateDominators(b *builder) {
if succ.ReturnBlock() || succ.invalid {
continue
}
- if b.blkVisited[succ] == visitStateUnseen {
- b.blkVisited[succ] = visitStateSeen
+ if succ.visited == visitStateUnseen {
+ succ.visited = visitStateSeen
exploreStack = append(exploreStack, succ)
}
}
// Finally, we could pop this block once we pop all of its successors.
- b.blkVisited[blk] = visitStateDone
+ blk.visited = visitStateDone
case visitStateDone:
// Note: at this point we push blk in postorder despite its name.
reversePostOrder = append(reversePostOrder, blk)
+ default:
+ panic("BUG")
}
}
// At this point, reversePostOrder has postorder actually, so we reverse it.
@@ -67,7 +68,7 @@ func passCalculateImmediateDominators(b *builder) {
}
for i, blk := range reversePostOrder {
- blk.reversePostOrder = i
+ blk.reversePostOrder = int32(i)
}
// Reuse the dominators slice if possible from the previous computation of function.
@@ -180,7 +181,7 @@ func passBuildLoopNestingForest(b *builder) {
b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk)
} else if n == ent {
} else if n.loopHeader {
- n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk)
+ n.loopNestingForestChildren = n.loopNestingForestChildren.Append(&b.varLengthBasicBlockPool, blk)
}
}
@@ -193,7 +194,7 @@ func passBuildLoopNestingForest(b *builder) {
func printLoopNestingForest(root *basicBlock, depth int) {
fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID())
- for _, child := range root.loopNestingForestChildren {
+ for _, child := range root.loopNestingForestChildren.View() {
fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID())
if child.LoopHeader() {
printLoopNestingForest(child.(*basicBlock), depth+2)
@@ -202,10 +203,10 @@ func printLoopNestingForest(root *basicBlock, depth int) {
}
type dominatorSparseTree struct {
- time int
+ time int32
euler []*basicBlock
- first, depth []int
- table [][]int
+ first, depth []int32
+ table [][]int32
}
// passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree.
@@ -232,11 +233,11 @@ func passBuildDominatorTree(b *builder) {
n := b.basicBlocksPool.Allocated()
st := &b.sparseTree
st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...)
- st.first = append(st.first[:0], make([]int, n)...)
+ st.first = append(st.first[:0], make([]int32, n)...)
for i := range st.first {
st.first[i] = -1
}
- st.depth = append(st.depth[:0], make([]int, 2*n-1)...)
+ st.depth = append(st.depth[:0], make([]int32, 2*n-1)...)
st.time = 0
// Start building the sparse tree.
@@ -244,9 +245,9 @@ func passBuildDominatorTree(b *builder) {
st.buildSparseTable()
}
-func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) {
+func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int32) {
if wazevoapi.SSALoggingEnabled {
- fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID())
+ fmt.Println(strings.Repeat("\t", int(height)), "euler tour:", node.ID())
}
dt.euler[dt.time] = node
dt.depth[dt.time] = height
@@ -270,13 +271,13 @@ func (dt *dominatorSparseTree) buildSparseTable() {
table := dt.table
if n >= len(table) {
- table = append(table, make([][]int, n+1)...)
+ table = append(table, make([][]int32, n-len(table)+1)...)
}
for i := range table {
if len(table[i]) < k {
- table[i] = append(table[i], make([]int, k)...)
+ table[i] = append(table[i], make([]int32, k-len(table[i]))...)
}
- table[i][0] = i
+ table[i][0] = int32(i)
}
for j := 1; 1<<j <= n; j++ {
@@ -292,7 +293,7 @@ func (dt *dominatorSparseTree) buildSparseTable() {
}
// rmq performs a range minimum query on the sparse table.
-func (dt *dominatorSparseTree) rmq(l, r int) int {
+func (dt *dominatorSparseTree) rmq(l, r int32) int32 {
table := dt.table
depth := dt.depth
j := int(math.Log2(float64(r - l + 1)))
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go
index e8c8cd9de..73daf4269 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go
@@ -21,6 +21,9 @@ const (
// TypeV128 represents 128-bit SIMD vectors.
TypeV128
+
+ // -- Do not add new types after this line. ----
+ typeEnd
)
// String implements fmt.Stringer.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
index 3149fdc9e..313e34f9a 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go
@@ -69,7 +69,7 @@ type IDedPool[T any] struct {
// NewIDedPool returns a new IDedPool.
func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] {
- return IDedPool[T]{pool: NewPool[T](resetFn)}
+ return IDedPool[T]{pool: NewPool[T](resetFn), maxIDEncountered: -1}
}
// GetOrAllocate returns the T with the given id.
@@ -97,7 +97,7 @@ func (p *IDedPool[T]) Get(id int) *T {
// Reset resets the pool.
func (p *IDedPool[T]) Reset() {
p.pool.Reset()
- for i := range p.idToItems {
+ for i := 0; i <= p.maxIDEncountered; i++ {
p.idToItems[i] = nil
}
p.maxIDEncountered = -1
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go
index 25d7d3fdc..0dc6ec19c 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go
@@ -6,6 +6,9 @@ type CpuFeatureFlags interface {
Has(cpuFeature CpuFeature) bool
// HasExtra returns true when the specified extraFlag (represented as uint64) is supported
HasExtra(cpuFeature CpuFeature) bool
+ // Raw returns the raw bitset that represents CPU features used by wazero. This can be used for cache keying.
+ // For now, we only use four features, so uint64 is enough.
+ Raw() uint64
}
type CpuFeature uint64
@@ -17,9 +20,11 @@ const (
CpuFeatureAmd64SSE4_1 CpuFeature = 1 << 19
// CpuFeatureAmd64SSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities on amd64
CpuFeatureAmd64SSE4_2 CpuFeature = 1 << 20
+ // Note: when adding new features, ensure that the feature is included in CpuFeatureFlags.Raw.
)
const (
// CpuExtraFeatureAmd64ABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT) on amd64
CpuExtraFeatureAmd64ABM CpuFeature = 1 << 5
+ // Note: when adding new features, ensure that the feature is included in CpuFeatureFlags.Raw.
)
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go
index 8c9f1a9f3..fbdb53936 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go
@@ -2,10 +2,10 @@
package platform
-// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods
-var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags()
+// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods.
+var CpuFeatures = loadCpuFeatureFlags()
-// cpuFeatureFlags implements CpuFeatureFlags interface
+// cpuFeatureFlags implements CpuFeatureFlags interface.
type cpuFeatureFlags struct {
flags uint64
extraFlags uint64
@@ -15,13 +15,13 @@ type cpuFeatureFlags struct {
// implemented in impl_amd64.s
func cpuid(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32)
-// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap
+// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap.
func cpuidAsBitmap(arg1, arg2 uint32) uint64 {
_ /* eax */, _ /* ebx */, ecx, edx := cpuid(arg1, arg2)
return (uint64(edx) << 32) | uint64(ecx)
}
-// loadStandardRange load flags from the standard range, panics otherwise
+// loadStandardRange load flags from the standard range, panics otherwise.
func loadStandardRange(id uint32) uint64 {
// ensure that the id is in the valid range, returned by cpuid(0,0)
maxRange, _, _, _ := cpuid(0, 0)
@@ -31,7 +31,7 @@ func loadStandardRange(id uint32) uint64 {
return cpuidAsBitmap(id, 0)
}
-// loadStandardRange load flags from the extended range, panics otherwise
+// loadStandardRange load flags from the extended range, panics otherwise.
func loadExtendedRange(id uint32) uint64 {
// ensure that the id is in the valid range, returned by cpuid(0x80000000,0)
maxRange, _, _, _ := cpuid(0x80000000, 0)
@@ -48,12 +48,32 @@ func loadCpuFeatureFlags() CpuFeatureFlags {
}
}
-// Has implements the same method on the CpuFeatureFlags interface
+// Has implements the same method on the CpuFeatureFlags interface.
func (f *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool {
return (f.flags & uint64(cpuFeature)) != 0
}
-// HasExtra implements the same method on the CpuFeatureFlags interface
+// HasExtra implements the same method on the CpuFeatureFlags interface.
func (f *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool {
return (f.extraFlags & uint64(cpuFeature)) != 0
}
+
+// Raw implements the same method on the CpuFeatureFlags interface.
+func (f *cpuFeatureFlags) Raw() uint64 {
+ // Below, we only set the first 4 bits for the features we care about,
+ // instead of setting all the unnecessary bits obtained from the CPUID instruction.
+ var ret uint64
+ if f.Has(CpuFeatureAmd64SSE3) {
+ ret = 1 << 0
+ }
+ if f.Has(CpuFeatureAmd64SSE4_1) {
+ ret |= 1 << 1
+ }
+ if f.Has(CpuFeatureAmd64SSE4_2) {
+ ret |= 1 << 2
+ }
+ if f.HasExtra(CpuExtraFeatureAmd64ABM) {
+ ret |= 1 << 3
+ }
+ return ret
+}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go
index 8ae826d36..291bcea65 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go
@@ -4,11 +4,14 @@ package platform
var CpuFeatures CpuFeatureFlags = &cpuFeatureFlags{}
-// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms
+// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms.
type cpuFeatureFlags struct{}
-// Has implements the same method on the CpuFeatureFlags interface
+// Has implements the same method on the CpuFeatureFlags interface.
func (c *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool { return false }
-// HasExtra implements the same method on the CpuFeatureFlags interface
+// HasExtra implements the same method on the CpuFeatureFlags interface.
func (c *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool { return false }
+
+// Raw implements the same method on the CpuFeatureFlags interface.
+func (c *cpuFeatureFlags) Raw() uint64 { return 0 }
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go
index a61996d58..b0519003b 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go
@@ -12,8 +12,6 @@ const (
mmapProtARM64 = syscall.PROT_READ | syscall.PROT_WRITE
)
-const MmapSupported = true
-
func munmapCodeSegment(code []byte) error {
return syscall.Munmap(code)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go
index 27833db37..079aa643f 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go
@@ -9,8 +9,6 @@ import (
var errUnsupported = fmt.Errorf("mmap unsupported on GOOS=%s. Use interpreter instead.", runtime.GOOS)
-const MmapSupported = false
-
func munmapCodeSegment(code []byte) error {
panic(errUnsupported)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go
index 69fcb6d6b..03a254d4a 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go
@@ -21,8 +21,6 @@ const (
windows_PAGE_EXECUTE_READWRITE uintptr = 0x00000040
)
-const MmapSupported = true
-
func munmapCodeSegment(code []byte) error {
return freeMemory(code)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go
index 9a77205bb..fdbf1fde0 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go
@@ -38,9 +38,6 @@ func NewStdioFile(stdin bool, f fs.File) (fsapi.File, error) {
}
func OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (*os.File, experimentalsys.Errno) {
- if flag&experimentalsys.O_DIRECTORY != 0 && flag&(experimentalsys.O_WRONLY|experimentalsys.O_RDWR) != 0 {
- return nil, experimentalsys.EISDIR // invalid to open a directory writeable
- }
return openFile(path, flag, perm)
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go
index 58a458217..61a342ef2 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go
@@ -69,4 +69,7 @@ type ModuleEngine interface {
// FunctionInstanceReference returns Reference for the given Index for a FunctionInstance. The returned values are used by
// the initialization via ElementSegment.
FunctionInstanceReference(funcIndex Index) Reference
+
+ // MemoryGrown notifies the engine that the memory has grown.
+ MemoryGrown()
}
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go
index 8da689076..ce2c7254d 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go
@@ -67,11 +67,6 @@ func (m *Module) validateFunctionWithMaxStackValues(
declaredFunctionIndexes map[Index]struct{},
br *bytes.Reader,
) error {
- nonStaticLocals := make(map[Index]struct{})
- if len(m.NonStaticLocals) > 0 {
- m.NonStaticLocals[idx] = nonStaticLocals
- }
-
functionType := &m.TypeSection[m.FunctionSection[idx]]
code := &m.CodeSection[idx]
body := code.Body
@@ -357,7 +352,6 @@ func (m *Module) validateFunctionWithMaxStackValues(
return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))",
OpcodeLocalSetName, index, l)
}
- nonStaticLocals[index] = struct{}{}
var expType ValueType
if index < inputLen {
expType = functionType.Params[index]
@@ -373,7 +367,6 @@ func (m *Module) validateFunctionWithMaxStackValues(
return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))",
OpcodeLocalTeeName, index, l)
}
- nonStaticLocals[index] = struct{}{}
var expType ValueType
if index < inputLen {
expType = functionType.Params[index]
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go
index 5cc5012da..947b16112 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go
@@ -59,11 +59,14 @@ type MemoryInstance struct {
// with a fixed weight of 1 and no spurious notifications.
waiters sync.Map
+ // ownerModuleEngine is the module engine that owns this memory instance.
+ ownerModuleEngine ModuleEngine
+
expBuffer experimental.LinearMemory
}
// NewMemoryInstance creates a new instance based on the parameters in the SectionIDMemory.
-func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *MemoryInstance {
+func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator, moduleEngine ModuleEngine) *MemoryInstance {
minBytes := MemoryPagesToBytesNum(memSec.Min)
capBytes := MemoryPagesToBytesNum(memSec.Cap)
maxBytes := MemoryPagesToBytesNum(memSec.Max)
@@ -89,12 +92,13 @@ func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *
buffer = make([]byte, minBytes, capBytes)
}
return &MemoryInstance{
- Buffer: buffer,
- Min: memSec.Min,
- Cap: memoryBytesNumToPages(uint64(cap(buffer))),
- Max: memSec.Max,
- Shared: memSec.IsShared,
- expBuffer: expBuffer,
+ Buffer: buffer,
+ Min: memSec.Min,
+ Cap: memoryBytesNumToPages(uint64(cap(buffer))),
+ Max: memSec.Max,
+ Shared: memSec.IsShared,
+ expBuffer: expBuffer,
+ ownerModuleEngine: moduleEngine,
}
}
@@ -247,14 +251,12 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) {
m.Buffer = buffer
m.Cap = newPages
}
- return currentPages, true
} else if newPages > m.Cap { // grow the memory.
if m.Shared {
panic("shared memory cannot be grown, this is a bug in wazero")
}
m.Buffer = append(m.Buffer, make([]byte, MemoryPagesToBytesNum(delta))...)
m.Cap = newPages
- return currentPages, true
} else { // We already have the capacity we need.
if m.Shared {
// We assume grow is called under a guest lock.
@@ -264,8 +266,9 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) {
} else {
m.Buffer = m.Buffer[:MemoryPagesToBytesNum(newPages)]
}
- return currentPages, true
}
+ m.ownerModuleEngine.MemoryGrown()
+ return currentPages, true
}
// Pages implements the same method as documented on api.Memory.
diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go
index 68573b918..8369ad9ed 100644
--- a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go
+++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go
@@ -185,9 +185,6 @@ type Module struct {
// as described in https://yurydelendik.github.io/webassembly-dwarf/, though it is not specified in the Wasm
// specification: https://github.com/WebAssembly/debugging/issues/1
DWARFLines *wasmdebug.DWARFLines
-
- // NonStaticLocals collects the local indexes that will change its value through either local.get or local.tee.
- NonStaticLocals []map[Index]struct{}
}
// ModuleID represents sha256 hash value uniquely assigned to Module.
@@ -366,8 +363,6 @@ func (m *Module) validateFunctions(enabledFeatures api.CoreFeatures, functions [
br := bytes.NewReader(nil)
// Also, we reuse the stacks across multiple function validations to reduce allocations.
vs := &stacks{}
- // Non-static locals are gathered during validation and used in the down-stream compilation.
- m.NonStaticLocals = make([]map[Index]struct{}, len(m.FunctionSection))
for idx, typeIndex := range m.FunctionSection {
if typeIndex >= typeCount {
return fmt.Errorf("invalid %s: type section index %d out of range", m.funcDesc(SectionIDFunction, Index(idx)), typeIndex)
@@ -655,7 +650,7 @@ func paramNames(localNames IndirectNameMap, funcIdx uint32, paramLen int) []stri
func (m *ModuleInstance) buildMemory(module *Module, allocator experimental.MemoryAllocator) {
memSec := module.MemorySection
if memSec != nil {
- m.MemoryInstance = NewMemoryInstance(memSec, allocator)
+ m.MemoryInstance = NewMemoryInstance(memSec, allocator, m.Engine)
m.MemoryInstance.definition = &module.MemoryDefinitionSection[0]
}
}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index d0fd99a6c..201dcdd5c 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -517,7 +517,7 @@ github.com/modern-go/concurrent
# github.com/modern-go/reflect2 v1.0.2
## explicit; go 1.12
github.com/modern-go/reflect2
-# github.com/ncruces/go-sqlite3 v0.16.1
+# github.com/ncruces/go-sqlite3 v0.16.2
## explicit; go 1.21
github.com/ncruces/go-sqlite3
github.com/ncruces/go-sqlite3/driver
@@ -833,7 +833,7 @@ github.com/tdewolff/parse/v2/strconv
# github.com/technologize/otel-go-contrib v1.1.1
## explicit; go 1.17
github.com/technologize/otel-go-contrib/otelginmetrics
-# github.com/tetratelabs/wazero v1.7.2
+# github.com/tetratelabs/wazero v1.7.3
## explicit; go 1.20
github.com/tetratelabs/wazero
github.com/tetratelabs/wazero/api