diff options
55 files changed, 1075 insertions, 1040 deletions
@@ -44,7 +44,7 @@ require (  	github.com/miekg/dns v1.1.59  	github.com/minio/minio-go/v7 v7.0.71  	github.com/mitchellh/mapstructure v1.5.0 -	github.com/ncruces/go-sqlite3 v0.16.1 +	github.com/ncruces/go-sqlite3 v0.16.2  	github.com/oklog/ulid v1.3.1  	github.com/prometheus/client_golang v1.19.1  	github.com/spf13/cobra v1.8.0 @@ -199,7 +199,7 @@ require (  	github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe // indirect  	github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB // indirect  	github.com/tdewolff/parse/v2 v2.7.14 // indirect -	github.com/tetratelabs/wazero v1.7.2 // indirect +	github.com/tetratelabs/wazero v1.7.3 // indirect  	github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc // indirect  	github.com/toqueteos/webbrowser v1.2.0 // indirect  	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect @@ -445,8 +445,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G  github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=  github.com/moul/http2curl v1.0.0 h1:dRMWoAtb+ePxMlLkrCbAqh4TlPHXvoGUSQ323/9Zahs=  github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= -github.com/ncruces/go-sqlite3 v0.16.1 h1:1wHv7s8y+fWK44UIliotJ42ZV41A5T0sjIAqGmnMrkc= -github.com/ncruces/go-sqlite3 v0.16.1/go.mod h1:feFXbBcbLtxNk6XWG1ROt8MS9+E45yCW3G8o4ixIqZ8= +github.com/ncruces/go-sqlite3 v0.16.2 h1:HesVRr0BC6QSGSEQfEXOntFWS9wd4Z8ms4nJzfUv4Rg= +github.com/ncruces/go-sqlite3 v0.16.2/go.mod h1:wkUIvOrAjFQnefVlivJfcowKUcfMHs4mvLfhVanzHHI=  github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=  github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=  github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M= @@ -562,8 +562,8 @@ github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 h1:IkjBCtQOOjIn03  github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739/go.mod h1:XPuWBzvdUzhCuxWO1ojpXsyzsA5bFoS3tO/Q3kFuTG8=  github.com/technologize/otel-go-contrib v1.1.1 h1:wZH9aSPNWZWIkEh3vfaKfMb15AJ80jJ1aVj/4GZdqIw=  github.com/technologize/otel-go-contrib v1.1.1/go.mod h1:dCN/wj2WyUO8aFZFdIN+6tfJHImjTML/8r2YVYAy3So= -github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc= -github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= +github.com/tetratelabs/wazero v1.7.3 h1:PBH5KVahrt3S2AHgEjKu4u+LlDbbk+nsGE3KLucy6Rw= +github.com/tetratelabs/wazero v1.7.3/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y=  github.com/tidwall/btree v0.0.0-20191029221954-400434d76274 h1:G6Z6HvJuPjG6XfNGi/feOATzeJrfgTNJY+rGrHbA04E=  github.com/tidwall/btree v0.0.0-20191029221954-400434d76274/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8=  github.com/tidwall/buntdb v1.1.2 h1:noCrqQXL9EKMtcdwJcmuVKSEjqu1ua99RHHgbLTEHRo= diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go index c0ba38cf0..7f6849a42 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go @@ -26,7 +26,7 @@ func (j JSON) Scan(value any) error {  		buf = v.AppendFormat(buf, time.RFC3339Nano)  		buf = append(buf, '"')  	case nil: -		buf = append(buf, "null"...) +		buf = []byte("null")  	default:  		panic(AssertErr())  	} diff --git a/vendor/github.com/ncruces/go-sqlite3/json.go b/vendor/github.com/ncruces/go-sqlite3/json.go index 9b2565e87..2b762c092 100644 --- a/vendor/github.com/ncruces/go-sqlite3/json.go +++ b/vendor/github.com/ncruces/go-sqlite3/json.go @@ -5,7 +5,8 @@ import "github.com/ncruces/go-sqlite3/internal/util"  // JSON returns a value that can be used as an argument to  // [database/sql.DB.Exec], [database/sql.Row.Scan] and similar methods to  // store value as JSON, or decode JSON into value. -// JSON should NOT be used with [BindJSON] or [ResultJSON]. +// JSON should NOT be used with [Stmt.BindJSON], [Stmt.ColumnJSON], +// [Value.JSON], or [Context.ResultJSON].  func JSON(value any) any {  	return util.JSON{Value: value}  } diff --git a/vendor/github.com/ncruces/go-sqlite3/pointer.go b/vendor/github.com/ncruces/go-sqlite3/pointer.go index 611c1528c..0e2418b99 100644 --- a/vendor/github.com/ncruces/go-sqlite3/pointer.go +++ b/vendor/github.com/ncruces/go-sqlite3/pointer.go @@ -4,7 +4,8 @@ import "github.com/ncruces/go-sqlite3/internal/util"  // Pointer returns a pointer to a value that can be used as an argument to  // [database/sql.DB.Exec] and similar methods. -// Pointer should NOT be used with [BindPointer] or [ResultPointer]. +// Pointer should NOT be used with [Stmt.BindPointer], +// [Value.Pointer], or [Context.ResultPointer].  //  // https://sqlite.org/bindptr.html  func Pointer[T any](value T) any { diff --git a/vendor/github.com/ncruces/go-sqlite3/stmt.go b/vendor/github.com/ncruces/go-sqlite3/stmt.go index ac40e3802..381a7d06b 100644 --- a/vendor/github.com/ncruces/go-sqlite3/stmt.go +++ b/vendor/github.com/ncruces/go-sqlite3/stmt.go @@ -564,7 +564,7 @@ func (s *Stmt) ColumnJSON(col int, ptr any) error {  	var data []byte  	switch s.ColumnType(col) {  	case NULL: -		data = append(data, "null"...) +		data = []byte("null")  	case TEXT:  		data = s.ColumnRawText(col)  	case BLOB: diff --git a/vendor/github.com/ncruces/go-sqlite3/value.go b/vendor/github.com/ncruces/go-sqlite3/value.go index d0edf215b..1894ff4f1 100644 --- a/vendor/github.com/ncruces/go-sqlite3/value.go +++ b/vendor/github.com/ncruces/go-sqlite3/value.go @@ -177,7 +177,7 @@ func (v Value) JSON(ptr any) error {  	var data []byte  	switch v.Type() {  	case NULL: -		data = append(data, "null"...) +		data = []byte("null")  	case TEXT:  		data = v.RawText()  	case BLOB: diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go index 8dc57ab9c..f21335d8e 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go @@ -75,11 +75,6 @@ func (memVFS) FullPathname(name string) (string, error) {  type memDB struct {  	name string -	// +checklocks:lockMtx -	pending *memFile -	// +checklocks:lockMtx -	reserved *memFile -  	// +checklocks:dataMtx  	data []*[sectorSize]byte @@ -88,6 +83,10 @@ type memDB struct {  	// +checklocks:lockMtx  	shared int +	// +checklocks:lockMtx +	reserved bool +	// +checklocks:lockMtx +	pending bool  	// +checklocks:memoryMtx  	refs int @@ -214,24 +213,24 @@ func (m *memFile) Lock(lock vfs.LockLevel) error {  	switch lock {  	case vfs.LOCK_SHARED: -		if m.pending != nil { +		if m.pending {  			return sqlite3.BUSY  		}  		m.shared++  	case vfs.LOCK_RESERVED: -		if m.reserved != nil { +		if m.reserved {  			return sqlite3.BUSY  		} -		m.reserved = m +		m.reserved = true  	case vfs.LOCK_EXCLUSIVE:  		if m.lock < vfs.LOCK_PENDING { -			if m.pending != nil { +			if m.pending {  				return sqlite3.BUSY  			}  			m.lock = vfs.LOCK_PENDING -			m.pending = m +			m.pending = true  		}  		for before := time.Now(); m.shared > 1; { @@ -256,11 +255,11 @@ func (m *memFile) Unlock(lock vfs.LockLevel) error {  	m.lockMtx.Lock()  	defer m.lockMtx.Unlock() -	if m.pending == m { -		m.pending = nil +	if m.pending && m.lock >= vfs.LOCK_PENDING { +		m.pending = false  	} -	if m.reserved == m { -		m.reserved = nil +	if m.reserved && m.lock >= vfs.LOCK_RESERVED { +		m.reserved = false  	}  	if lock < vfs.LOCK_SHARED {  		m.shared-- @@ -275,7 +274,7 @@ func (m *memFile) CheckReservedLock() (bool, error) {  	}  	m.lockMtx.Lock()  	defer m.lockMtx.Unlock() -	return m.reserved != nil, nil +	return m.reserved, nil  }  func (m *memFile) SectorSize() int { diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go index 58da34df4..7b0d4b677 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go @@ -125,6 +125,9 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext  		return 0, _IOERR_SHMMAP  	}  	s.regions = append(s.regions, r) +	if s.readOnly { +		return r.Ptr, _READONLY +	}  	return r.Ptr, _OK  } diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go index 3b45b3087..8c2abee81 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go @@ -101,13 +101,13 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) {  		return _OK  	} -	// Open file read-write, as it will be shared. +	// Always open file read-write, as it will be shared.  	f, err := os.OpenFile(s.path,  		unix.O_RDWR|unix.O_CREAT|unix.O_NOFOLLOW, 0666)  	if err != nil {  		return _CANTOPEN  	} -	// Close if file if it's not nil. +	// Closes file if it's not nil.  	defer func() { f.Close() }()  	fi, err := f.Stat() @@ -145,17 +145,14 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) {  		info: fi,  		refs: 1,  	} -	f = nil -	add := true +	f = nil // Don't close the file.  	for i, g := range vfsShmFiles {  		if g == nil {  			vfsShmFiles[i] = s.vfsShmFile -			add = false +			return rc  		}  	} -	if add { -		vfsShmFiles = append(vfsShmFiles, s.vfsShmFile) -	} +	vfsShmFiles = append(vfsShmFiles, s.vfsShmFile)  	return rc  } @@ -195,6 +192,9 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext  		return 0, _IOERR_SHMMAP  	}  	s.regions = append(s.regions, r) +	if s.readOnly { +		return r.Ptr, _READONLY +	}  	return r.Ptr, _OK  } diff --git a/vendor/github.com/tetratelabs/wazero/config.go b/vendor/github.com/tetratelabs/wazero/config.go index 819a76df5..d3656849c 100644 --- a/vendor/github.com/tetratelabs/wazero/config.go +++ b/vendor/github.com/tetratelabs/wazero/config.go @@ -148,7 +148,7 @@ type RuntimeConfig interface {  	//	customSections := c.CustomSections()  	WithCustomSections(bool) RuntimeConfig -	// WithCloseOnContextDone ensures the executions of functions to be closed under one of the following circumstances: +	// WithCloseOnContextDone ensures the executions of functions to be terminated under one of the following circumstances:  	//  	// 	- context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel)  	// 	- context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline) @@ -159,6 +159,8 @@ type RuntimeConfig interface {  	// entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated  	// machine codes against async Goroutine preemption" section in RATIONALE.md for detail.  	// +	// Upon the termination of the function executions, api.Module is closed. +	//  	// Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces  	// interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason,  	// this is disabled by default. @@ -217,9 +219,18 @@ const (  // part. wazero automatically performs ahead-of-time compilation as needed when  // Runtime.CompileModule is invoked.  // -// Warning: This panics at runtime if the runtime.GOOS or runtime.GOARCH does not -// support compiler. Use NewRuntimeConfig to safely detect and fallback to -// NewRuntimeConfigInterpreter if needed. +// # Warning +// +//   - This panics at runtime if the runtime.GOOS or runtime.GOARCH does not +//     support compiler. Use NewRuntimeConfig to safely detect and fallback to +//     NewRuntimeConfigInterpreter if needed. +// +//   - If you are using wazero in buildmode=c-archive or c-shared, make sure that you set up the alternate signal stack +//     by using, e.g. `sigaltstack` combined with `SA_ONSTACK` flag on `sigaction` on Linux, +//     before calling any api.Function. This is because the Go runtime does not set up the alternate signal stack +//     for c-archive or c-shared modes, and wazero uses the different stack than the calling Goroutine. +//     Hence, the signal handler might get invoked on the wazero's stack, which may cause a stack overflow. +//     https://github.com/tetratelabs/wazero/blob/2092c0a879f30d49d7b37f333f4547574b8afe0d/internal/integration_test/fuzz/fuzz/tests/sigstack.rs#L19-L36  func NewRuntimeConfigCompiler() RuntimeConfig {  	ret := engineLessConfig.clone()  	ret.engineKind = engineKindCompiler diff --git a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go index 443c5a294..c75db615e 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go @@ -21,13 +21,6 @@ type Snapshotter interface {  	Snapshot() Snapshot  } -// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled. -// The context.Context passed to a exported function invocation should have this key set -// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey. -// -// Deprecated: use WithSnapshotter to enable snapshots. -type EnableSnapshotterKey = expctxkeys.EnableSnapshotterKey -  // WithSnapshotter enables snapshots.  // Passing the returned context to a exported function invocation enables snapshots,  // and allows host functions to retrieve the Snapshotter using GetSnapshotter. @@ -35,12 +28,6 @@ func WithSnapshotter(ctx context.Context) context.Context {  	return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{})  } -// SnapshotterKey is a context key to access a Snapshotter from a host function. -// It is only present if EnableSnapshotter was set in the function invocation context. -// -// Deprecated: use GetSnapshotter to get the snapshotter. -type SnapshotterKey = expctxkeys.SnapshotterKey -  // GetSnapshotter gets the Snapshotter from a host function.  // It is only present if WithSnapshotter was called with the function invocation context.  func GetSnapshotter(ctx context.Context) Snapshotter { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/listener.go b/vendor/github.com/tetratelabs/wazero/experimental/listener.go index b2ba1fe83..55fc6b668 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/listener.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/listener.go @@ -24,12 +24,6 @@ type StackIterator interface {  	ProgramCounter() ProgramCounter  } -// FunctionListenerFactoryKey is a context.Context Value key. -// Its associated value should be a FunctionListenerFactory. -// -// Deprecated: use WithFunctionListenerFactory to enable snapshots. -type FunctionListenerFactoryKey = expctxkeys.FunctionListenerFactoryKey -  // WithFunctionListenerFactory registers a FunctionListenerFactory  // with the context.  func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go index 761a1f9dc..5ebc1780f 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go @@ -23,6 +23,10 @@ const (  	// instead of syscall.ENOTDIR  	_ERROR_DIRECTORY = syscall.Errno(0x10B) +	// _ERROR_NOT_A_REPARSE_POINT is a Windows error returned by os.Readlink +	// instead of syscall.EINVAL +	_ERROR_NOT_A_REPARSE_POINT = syscall.Errno(0x1126) +  	// _ERROR_INVALID_SOCKET is a Windows error returned by winsock_select  	// when a given handle is not a socket.  	_ERROR_INVALID_SOCKET = syscall.Errno(0x2736) @@ -51,7 +55,7 @@ func errorToErrno(err error) Errno {  			return EBADF  		case syscall.ERROR_PRIVILEGE_NOT_HELD:  			return EPERM -		case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME: +		case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME, _ERROR_NOT_A_REPARSE_POINT:  			return EINVAL  		}  		errno, _ := syscallToErrno(err) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go index a89ddc457..18c5f4252 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go @@ -98,6 +98,9 @@ func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) {  // OwnsGlobals implements the same method as documented on wasm.ModuleEngine.  func (e *moduleEngine) OwnsGlobals() bool { return false } +// MemoryGrown implements wasm.ModuleEngine. +func (e *moduleEngine) MemoryGrown() {} +  // callEngine holds context per moduleEngine.Call, and shared across all the  // function calls originating from the same moduleEngine.Call execution.  // diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go index 81c6a6b62..8e9571b20 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go @@ -43,7 +43,7 @@ type ExecutableContextT[Instr any] struct {  	labelPositionPool wazevoapi.Pool[LabelPosition[Instr]]  	NextLabel         Label  	// LabelPositions maps a label to the instructions of the region which the label represents. -	LabelPositions     map[Label]*LabelPosition[Instr] +	LabelPositions     []*LabelPosition[Instr]  	OrderedBlockLabels []*LabelPosition[Instr]  	// PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. @@ -67,7 +67,6 @@ func NewExecutableContextT[Instr any](  		setNext:           setNext,  		setPrev:           setPrev,  		labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]), -		LabelPositions:    make(map[Label]*LabelPosition[Instr]),  		NextLabel:         LabelInvalid,  	}  } @@ -97,11 +96,7 @@ func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) {  	end := e.allocateNop0()  	e.PerBlockHead, e.PerBlockEnd = end, end -	labelPos, ok := e.LabelPositions[l] -	if !ok { -		labelPos = e.AllocateLabelPosition(l) -		e.LabelPositions[l] = labelPos -	} +	labelPos := e.GetOrAllocateLabelPosition(l)  	e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos)  	labelPos.Begin, labelPos.End = end, end  	labelPos.SB = blk @@ -146,8 +141,8 @@ func (e *ExecutableContextT[T]) FlushPendingInstructions() {  func (e *ExecutableContextT[T]) Reset() {  	e.labelPositionPool.Reset()  	e.InstructionPool.Reset() -	for l := Label(0); l <= e.NextLabel; l++ { -		delete(e.LabelPositions, l) +	for i := range e.LabelPositions { +		e.LabelPositions[i] = nil  	}  	e.PendingInstructions = e.PendingInstructions[:0]  	e.OrderedBlockLabels = e.OrderedBlockLabels[:0] @@ -163,10 +158,17 @@ func (e *ExecutableContextT[T]) AllocateLabel() Label {  	return e.NextLabel  } -func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] { -	l := e.labelPositionPool.Allocate() -	l.L = la -	return l +func (e *ExecutableContextT[T]) GetOrAllocateLabelPosition(l Label) *LabelPosition[T] { +	if len(e.LabelPositions) <= int(l) { +		e.LabelPositions = append(e.LabelPositions, make([]*LabelPosition[T], int(l)+1-len(e.LabelPositions))...) +	} +	ret := e.LabelPositions[l] +	if ret == nil { +		ret = e.labelPositionPool.Allocate() +		ret.L = l +		e.LabelPositions[l] = ret +	} +	return ret  }  func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index 310ad2203..61ae6f406 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -1906,8 +1906,10 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) {  func (m *machine) Format() string {  	ectx := m.ectx  	begins := map[*instruction]backend.Label{} -	for l, pos := range ectx.LabelPositions { -		begins[pos.Begin] = l +	for _, pos := range ectx.LabelPositions { +		if pos != nil { +			begins[pos.Begin] = pos.L +		}  	}  	irBlocks := map[backend.Label]ssa.BasicBlockID{} @@ -1950,7 +1952,10 @@ func (m *machine) encodeWithoutSSA(root *instruction) {  		offset := int64(len(*bufPtr))  		if cur.kind == nop0 {  			l := cur.nop0Label() -			if pos, ok := ectx.LabelPositions[l]; ok { +			if int(l) >= len(ectx.LabelPositions) { +				continue +			} +			if pos := ectx.LabelPositions[l]; pos != nil {  				pos.BinaryOffset = offset  			}  		} @@ -2005,7 +2010,7 @@ func (m *machine) Encode(ctx context.Context) (err error) {  			switch cur.kind {  			case nop0:  				l := cur.nop0Label() -				if pos, ok := ectx.LabelPositions[l]; ok { +				if pos := ectx.LabelPositions[l]; pos != nil {  					pos.BinaryOffset = offset  				}  			case sourceOffsetInfo: @@ -2165,8 +2170,7 @@ func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nol  func (m *machine) allocateLabel() *labelPosition {  	ectx := m.ectx  	l := ectx.AllocateLabel() -	pos := ectx.AllocateLabelPosition(l) -	ectx.LabelPositions[l] = pos +	pos := ectx.GetOrAllocateLabelPosition(l)  	return pos  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index 6615471c6..4eaa13ce1 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) {  			bits := arg.Type.Bits()  			// At this point of compilation, we don't yet know how much space exist below the return address.  			// So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. -			amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} +			amode := m.amodePool.Allocate() +			*amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace}  			load := m.allocateInstr()  			switch arg.Type {  			case ssa.TypeI32, ssa.TypeI64: -				load.asULoad(operandNR(reg), amode, bits) +				load.asULoad(reg, amode, bits)  			case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: -				load.asFpuLoad(operandNR(reg), amode, bits) +				load.asFpuLoad(reg, amode, bits)  			default:  				panic("BUG")  			} @@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) {  			// At this point of compilation, we don't yet know how much space exist below the return address.  			// So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. -			amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} +			amode := m.amodePool.Allocate() +			*amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace}  			store := m.allocateInstr()  			store.asStore(operandNR(reg), amode, bits)  			m.insert(store) @@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i  		ldr := m.allocateInstr()  		switch r.Type {  		case ssa.TypeI32, ssa.TypeI64: -			ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) +			ldr.asULoad(reg, amode, r.Type.Bits())  		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: -			ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) +			ldr.asFpuLoad(reg, amode, r.Type.Bits())  		default:  			panic("BUG")  		} @@ -225,7 +227,7 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i  	}  } -func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { +func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) {  	exct := m.executableContext  	exct.PendingInstructions = exct.PendingInstructions[:0]  	mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) @@ -235,15 +237,15 @@ func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset  	return cur, mode  } -func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { +func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode {  	if rn.RegType() != regalloc.RegTypeInt {  		panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64))  	} -	var amode addressMode +	amode := m.amodePool.Allocate()  	if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { -		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} +		*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset}  	} else if offsetFitsInAddressModeKindRegSignedImm9(offset) { -		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} +		*amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset}  	} else {  		var indexReg regalloc.VReg  		if allowTmpRegUse { @@ -253,7 +255,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg  			indexReg = m.compiler.AllocateVReg(ssa.TypeI64)  			m.lowerConstantI64(indexReg, offset)  		} -		amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} +		*amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */}  	}  	return amode  } @@ -315,7 +317,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b  		} else {  			ao = aluOpSub  		} -		alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) +		alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true)  		m.insert(alu)  	} else {  		m.lowerConstantI64(tmpRegVReg, diff) @@ -326,7 +328,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b  		} else {  			ao = aluOpSub  		} -		alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) +		alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true)  		m.insert(alu)  	}  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go index 7a9cceb33..f8b5d97ac 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go @@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo  	} else {  		postIndexImm = 8  	} -	loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} +	loadMode := m.amodePool.Allocate() +	*loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm}  	instr := m.allocateInstr()  	switch typ {  	case ssa.TypeI32: -		instr.asULoad(loadTargetReg, loadMode, 32) +		instr.asULoad(loadTargetReg.reg(), loadMode, 32)  	case ssa.TypeI64: -		instr.asULoad(loadTargetReg, loadMode, 64) +		instr.asULoad(loadTargetReg.reg(), loadMode, 64)  	case ssa.TypeF32: -		instr.asFpuLoad(loadTargetReg, loadMode, 32) +		instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32)  	case ssa.TypeF64: -		instr.asFpuLoad(loadTargetReg, loadMode, 64) +		instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64)  	case ssa.TypeV128: -		instr.asFpuLoad(loadTargetReg, loadMode, 128) +		instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128)  	}  	cur = linkInstr(cur, instr)  	if isStackArg { -		var storeMode addressMode +		var storeMode *addressMode  		cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true)  		toStack := m.allocateInstr()  		toStack.asStore(loadTargetReg, storeMode, bits) @@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg  	}  	if isStackArg { -		var loadMode addressMode +		var loadMode *addressMode  		cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true)  		toReg := m.allocateInstr()  		switch typ {  		case ssa.TypeI32, ssa.TypeI64: -			toReg.asULoad(storeTargetReg, loadMode, bits) +			toReg.asULoad(storeTargetReg.reg(), loadMode, bits)  		case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: -			toReg.asFpuLoad(storeTargetReg, loadMode, bits) +			toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits)  		default:  			panic("TODO?")  		}  		cur = linkInstr(cur, toReg)  	} -	mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} +	mode := m.amodePool.Allocate() +	*mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm}  	instr := m.allocateInstr()  	instr.asStore(storeTargetReg, mode, bits)  	cur = linkInstr(cur, instr) @@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction  func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction {  	instr := m.allocateInstr() -	mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} +	mode := m.amodePool.Allocate() +	*mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()}  	if store {  		instr.asStore(operandNR(d), mode, 64)  	} else { -		instr.asULoad(operandNR(d), mode, 64) +		instr.asULoad(d, mode, 64)  	}  	return linkInstr(prev, instr)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 466b1f960..99e6bb482 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -87,7 +87,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  		// Module context is always the second argument.  		moduleCtrPtr := x1VReg  		store := m.allocateInstr() -		amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} +		amode := m.amodePool.Allocate() +		*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset}  		store.asStore(operandNR(moduleCtrPtr), amode, 64)  		cur = linkInstr(cur, store)  	} @@ -120,11 +121,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  		} else {  			sizeInBits = 64  		} -		store.asStore(operandNR(v), -			addressMode{ -				kind: addressModeKindPostIndex, -				rn:   arg0ret0AddrReg, imm: int64(sizeInBits / 8), -			}, sizeInBits) +		amode := m.amodePool.Allocate() +		*amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)} +		store.asStore(operandNR(v), amode, sizeInBits)  		cur = linkInstr(cur, store)  	} @@ -139,7 +138,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  		frameSizeReg = xzrVReg  		sliceSizeReg = xzrVReg  	} -	_amode := addressModePreOrPostIndex(spVReg, -16, true) +	_amode := addressModePreOrPostIndex(m, spVReg, -16, true)  	storeP := m.allocateInstr()  	storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode)  	cur = linkInstr(cur, storeP) @@ -165,8 +164,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  	cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true)  	ldr := m.allocateInstr()  	// And load the return address. -	ldr.asULoad(operandNR(lrVReg), -		addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) +	amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */) +	ldr.asULoad(lrVReg, amode, 64)  	cur = linkInstr(cur, ldr)  	originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want. @@ -183,23 +182,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  		r := &abi.Rets[i]  		if r.Kind == backend.ABIArgKindReg {  			loadIntoReg := m.allocateInstr() -			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} +			mode := m.amodePool.Allocate() +			*mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}  			switch r.Type {  			case ssa.TypeI32:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoReg.asULoad(operandNR(r.Reg), mode, 32) +				loadIntoReg.asULoad(r.Reg, mode, 32)  			case ssa.TypeI64:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoReg.asULoad(operandNR(r.Reg), mode, 64) +				loadIntoReg.asULoad(r.Reg, mode, 64)  			case ssa.TypeF32:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32) +				loadIntoReg.asFpuLoad(r.Reg, mode, 32)  			case ssa.TypeF64:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64) +				loadIntoReg.asFpuLoad(r.Reg, mode, 64)  			case ssa.TypeV128:  				mode.imm = 16 -				loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128) +				loadIntoReg.asFpuLoad(r.Reg, mode, 128)  			default:  				panic("TODO")  			} @@ -208,28 +208,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig *  			// First we need to load the value to a temporary just like ^^.  			intTmp, floatTmp := x11VReg, v11VReg  			loadIntoTmpReg := m.allocateInstr() -			mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} +			mode := m.amodePool.Allocate() +			*mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg}  			var resultReg regalloc.VReg  			switch r.Type {  			case ssa.TypeI32:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32) +				loadIntoTmpReg.asULoad(intTmp, mode, 32)  				resultReg = intTmp  			case ssa.TypeI64:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64) +				loadIntoTmpReg.asULoad(intTmp, mode, 64)  				resultReg = intTmp  			case ssa.TypeF32:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32) +				loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32)  				resultReg = floatTmp  			case ssa.TypeF64:  				mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64) +				loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64)  				resultReg = floatTmp  			case ssa.TypeV128:  				mode.imm = 16 -				loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128) +				loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128)  				resultReg = floatTmp  			default:  				panic("TODO") @@ -258,12 +259,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal  		case regalloc.RegTypeFloat:  			sizeInBits = 128  		} -		store.asStore(operandNR(v), -			addressMode{ -				kind: addressModeKindRegUnsignedImm12, -				// Execution context is always the first argument. -				rn: x0VReg, imm: offset, -			}, sizeInBits) +		mode := m.amodePool.Allocate() +		*mode = addressMode{ +			kind: addressModeKindRegUnsignedImm12, +			// Execution context is always the first argument. +			rn: x0VReg, imm: offset, +		} +		store.asStore(operandNR(v), mode, sizeInBits)  		store.prev = cur  		cur.next = store  		cur = store @@ -276,7 +278,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re  	offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64()  	for _, v := range regs {  		load := m.allocateInstr() -		var as func(dst operand, amode addressMode, sizeInBits byte) +		var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte)  		var sizeInBits byte  		switch v.RegType() {  		case regalloc.RegTypeInt: @@ -286,12 +288,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re  			as = load.asFpuLoad  			sizeInBits = 128  		} -		as(operandNR(v), -			addressMode{ -				kind: addressModeKindRegUnsignedImm12, -				// Execution context is always the first argument. -				rn: x0VReg, imm: offset, -			}, sizeInBits) +		mode := m.amodePool.Allocate() +		*mode = addressMode{ +			kind: addressModeKindRegUnsignedImm12, +			// Execution context is always the first argument. +			rn: x0VReg, imm: offset, +		} +		as(v, mode, sizeInBits)  		cur = linkInstr(cur, load)  		offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16.  	} @@ -324,11 +327,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode  	// Set the exit status on the execution context.  	setExistStatus := m.allocateInstr() -	setExistStatus.asStore(operandNR(constReg), -		addressMode{ -			kind: addressModeKindRegUnsignedImm12, -			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), -		}, 32) +	mode := m.amodePool.Allocate() +	*mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()} +	setExistStatus.asStore(operandNR(constReg), mode, 32)  	cur = linkInstr(cur, setExistStatus)  	return cur  } @@ -340,12 +341,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction {  	cur = linkInstr(cur, adr)  	storeReturnAddr := m.allocateInstr() -	storeReturnAddr.asStore(operandNR(tmpRegVReg), -		addressMode{ -			kind: addressModeKindRegUnsignedImm12, -			// Execution context is always the first argument. -			rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), -		}, 64) +	mode := m.amodePool.Allocate() +	*mode = addressMode{ +		kind: addressModeKindRegUnsignedImm12, +		// Execution context is always the first argument. +		rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), +	} +	storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64)  	cur = linkInstr(cur, storeReturnAddr)  	// Exit the execution. @@ -364,11 +366,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe  	cur = linkInstr(cur, movSp)  	strSp := m.allocateInstr() -	strSp.asStore(operandNR(tmpRegVReg), -		addressMode{ -			kind: addressModeKindRegUnsignedImm12, -			rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), -		}, 64) +	mode := m.amodePool.Allocate() +	*mode = addressMode{ +		kind: addressModeKindRegUnsignedImm12, +		rn:   execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), +	} +	strSp.asStore(operandNR(tmpRegVReg), mode, 64)  	cur = linkInstr(cur, strSp)  	return cur  } @@ -376,27 +379,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe  func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) {  	load := m.allocateInstr()  	var result regalloc.VReg -	mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} +	mode := m.amodePool.Allocate() +	*mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg}  	switch arg.Type {  	case ssa.TypeI32:  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -		load.asULoad(operandNR(intVReg), mode, 32) +		load.asULoad(intVReg, mode, 32)  		result = intVReg  	case ssa.TypeI64:  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -		load.asULoad(operandNR(intVReg), mode, 64) +		load.asULoad(intVReg, mode, 64)  		result = intVReg  	case ssa.TypeF32:  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -		load.asFpuLoad(operandNR(floatVReg), mode, 32) +		load.asFpuLoad(floatVReg, mode, 32)  		result = floatVReg  	case ssa.TypeF64:  		mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. -		load.asFpuLoad(operandNR(floatVReg), mode, 64) +		load.asFpuLoad(floatVReg, mode, 64)  		result = floatVReg  	case ssa.TypeV128:  		mode.imm = 16 -		load.asFpuLoad(operandNR(floatVReg), mode, 128) +		load.asFpuLoad(floatVReg, mode, 128)  		result = floatVReg  	default:  		panic("TODO") @@ -408,7 +412,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r  func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction {  	store := m.allocateInstr() -	mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} +	mode := m.amodePool.Allocate() +	*mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg}  	var sizeInBits byte  	switch result.Type {  	case ssa.TypeI32, ssa.TypeF32: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 8aabc5997..7121cb538 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -3,10 +3,12 @@ package arm64  import (  	"fmt"  	"math" +	"unsafe"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc"  	"github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" +	"github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi"  )  type ( @@ -22,9 +24,9 @@ type (  	// TODO: optimize the layout later once the impl settles.  	instruction struct {  		prev, next          *instruction -		u1, u2, u3          uint64 -		rd, rm, rn, ra      operand -		amode               addressMode +		u1, u2              uint64 +		rd                  regalloc.VReg +		rm, rn              operand  		kind                instructionKind  		addedBeforeRegAlloc bool  	} @@ -174,7 +176,7 @@ func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg {  	switch defKinds[i.kind] {  	case defKindNone:  	case defKindRD: -		*regs = append(*regs, i.rd.nr()) +		*regs = append(*regs, i.rd)  	case defKindCall:  		_, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2)  		for i := byte(0); i < retIntRealRegs; i++ { @@ -194,7 +196,7 @@ func (i *instruction) AssignDef(reg regalloc.VReg) {  	switch defKinds[i.kind] {  	case defKindNone:  	case defKindRD: -		i.rd = i.rd.assignReg(reg) +		i.rd = reg  	case defKindCall:  		panic("BUG: call instructions shouldn't be assigned")  	default: @@ -329,7 +331,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {  		if rm := i.rm.reg(); rm.Valid() {  			*regs = append(*regs, rm)  		} -		if ra := i.ra.reg(); ra.Valid() { +		if ra := regalloc.VReg(i.u2); ra.Valid() {  			*regs = append(*regs, ra)  		}  	case useKindRNRN1RM: @@ -341,18 +343,20 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {  			*regs = append(*regs, rm)  		}  	case useKindAMode: -		if amodeRN := i.amode.rn; amodeRN.Valid() { +		amode := i.getAmode() +		if amodeRN := amode.rn; amodeRN.Valid() {  			*regs = append(*regs, amodeRN)  		} -		if amodeRM := i.amode.rm; amodeRM.Valid() { +		if amodeRM := amode.rm; amodeRM.Valid() {  			*regs = append(*regs, amodeRM)  		}  	case useKindRNAMode:  		*regs = append(*regs, i.rn.reg()) -		if amodeRN := i.amode.rn; amodeRN.Valid() { +		amode := i.getAmode() +		if amodeRN := amode.rn; amodeRN.Valid() {  			*regs = append(*regs, amodeRN)  		} -		if amodeRM := i.amode.rm; amodeRM.Valid() { +		if amodeRM := amode.rm; amodeRM.Valid() {  			*regs = append(*regs, amodeRM)  		}  	case useKindCond: @@ -374,7 +378,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg {  	case useKindRDRewrite:  		*regs = append(*regs, i.rn.reg())  		*regs = append(*regs, i.rm.reg()) -		*regs = append(*regs, i.rd.reg()) +		*regs = append(*regs, i.rd)  	default:  		panic(fmt.Sprintf("useKind for %v not defined", i))  	} @@ -408,8 +412,8 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) {  				i.rm = i.rm.assignReg(reg)  			}  		} else { -			if rd := i.rd.reg(); rd.Valid() { -				i.rd = i.rd.assignReg(reg) +			if rd := i.rd; rd.Valid() { +				i.rd = reg  			}  		}  	case useKindRNRN1RM: @@ -435,32 +439,36 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) {  				i.rm = i.rm.assignReg(reg)  			}  		} else { -			if ra := i.ra.reg(); ra.Valid() { -				i.ra = i.ra.assignReg(reg) +			if ra := regalloc.VReg(i.u2); ra.Valid() { +				i.u2 = uint64(reg)  			}  		}  	case useKindAMode:  		if index == 0 { -			if amodeRN := i.amode.rn; amodeRN.Valid() { -				i.amode.rn = reg +			amode := i.getAmode() +			if amodeRN := amode.rn; amodeRN.Valid() { +				amode.rn = reg  			}  		} else { -			if amodeRM := i.amode.rm; amodeRM.Valid() { -				i.amode.rm = reg +			amode := i.getAmode() +			if amodeRM := amode.rm; amodeRM.Valid() { +				amode.rm = reg  			}  		}  	case useKindRNAMode:  		if index == 0 {  			i.rn = i.rn.assignReg(reg)  		} else if index == 1 { -			if amodeRN := i.amode.rn; amodeRN.Valid() { -				i.amode.rn = reg +			amode := i.getAmode() +			if amodeRN := amode.rn; amodeRN.Valid() { +				amode.rn = reg  			} else {  				panic("BUG")  			}  		} else { -			if amodeRM := i.amode.rm; amodeRM.Valid() { -				i.amode.rm = reg +			amode := i.getAmode() +			if amodeRM := amode.rm; amodeRM.Valid() { +				amode.rm = reg  			} else {  				panic("BUG")  			} @@ -503,35 +511,35 @@ func (i *instruction) callFuncRef() ssa.FuncRef {  }  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {  	i.kind = movZ -	i.rd = operandNR(dst) +	i.rd = dst  	i.u1 = imm -	i.u2 = shift +	i.u2 = uint64(shift)  	if dst64bit { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  }  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {  	i.kind = movK -	i.rd = operandNR(dst) +	i.rd = dst  	i.u1 = imm -	i.u2 = shift +	i.u2 = uint64(shift)  	if dst64bit { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  }  // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) {  	i.kind = movN -	i.rd = operandNR(dst) +	i.rd = dst  	i.u1 = imm -	i.u2 = shift +	i.u2 = uint64(shift)  	if dst64bit { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  } @@ -553,21 +561,21 @@ func (i *instruction) asRet() {  	i.kind = ret  } -func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) {  	i.kind = storeP64  	i.rn = operandNR(src1)  	i.rm = operandNR(src2) -	i.amode = amode +	i.setAmode(amode)  } -func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) {  	i.kind = loadP64  	i.rn = operandNR(src1)  	i.rm = operandNR(src2) -	i.amode = amode +	i.setAmode(amode)  } -func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) {  	switch sizeInBits {  	case 8:  		i.kind = store8 @@ -589,10 +597,10 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) {  		i.kind = fpuStore128  	}  	i.rn = src -	i.amode = amode +	i.setAmode(amode)  } -func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {  	switch sizeInBits {  	case 8:  		i.kind = sLoad8 @@ -604,10 +612,10 @@ func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) {  		panic("BUG")  	}  	i.rd = dst -	i.amode = amode +	i.setAmode(amode)  } -func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {  	switch sizeInBits {  	case 8:  		i.kind = uLoad8 @@ -619,10 +627,10 @@ func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) {  		i.kind = uLoad64  	}  	i.rd = dst -	i.amode = amode +	i.setAmode(amode)  } -func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) {  	switch sizeInBits {  	case 32:  		i.kind = fpuLoad32 @@ -632,10 +640,18 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte)  		i.kind = fpuLoad128  	}  	i.rd = dst -	i.amode = amode +	i.setAmode(amode)  } -func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { +func (i *instruction) getAmode() *addressMode { +	return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1)) +} + +func (i *instruction) setAmode(a *addressMode) { +	i.u1 = uint64(uintptr(unsafe.Pointer(a))) +} + +func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) {  	// NOTE: currently only has support for no-offset loads, though it is suspicious that  	// we would need to support offset load (that is only available for post-index).  	i.kind = vecLoad1R @@ -646,32 +662,32 @@ func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) {  func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) {  	i.kind = cSet -	i.rd = operandNR(rd) +	i.rd = rd  	i.u1 = uint64(c)  	if mask {  		i.u2 = 1  	}  } -func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) {  	i.kind = cSel  	i.rd = rd  	i.rn = rn  	i.rm = rm  	i.u1 = uint64(c)  	if _64bit { -		i.u3 = 1 +		i.u2 = 1  	}  } -func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) {  	i.kind = fpuCSel  	i.rd = rd  	i.rn = rn  	i.rm = rm  	i.u1 = uint64(c)  	if _64bit { -		i.u3 = 1 +		i.u2 = 1  	}  } @@ -691,7 +707,7 @@ func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, tar  }  func (i *instruction) brTableSequenceOffsetsResolved() { -	i.u3 = 1 // indicate that the offsets are resolved, for debugging. +	i.rm.data = 1 // indicate that the offsets are resolved, for debugging.  }  func (i *instruction) brLabel() label { @@ -701,7 +717,7 @@ func (i *instruction) brLabel() label {  // brOffsetResolved is called when the target label is resolved.  func (i *instruction) brOffsetResolve(offset int64) {  	i.u2 = uint64(offset) -	i.u3 = 1 // indicate that the offset is resolved, for debugging. +	i.rm.data = 1 // indicate that the offset is resolved, for debugging.  }  func (i *instruction) brOffset() int64 { @@ -714,7 +730,7 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) {  	i.u1 = c.asUint64()  	i.u2 = uint64(target)  	if is64bit { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  } @@ -728,17 +744,17 @@ func (i *instruction) condBrLabel() label {  // condBrOffsetResolve is called when the target label is resolved.  func (i *instruction) condBrOffsetResolve(offset int64) { -	i.rd.data = uint64(offset) -	i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. +	i.rn.data = uint64(offset) +	i.rn.data2 = 1 // indicate that the offset is resolved, for debugging.  }  // condBrOffsetResolved returns true if condBrOffsetResolve is already called.  func (i *instruction) condBrOffsetResolved() bool { -	return i.rd.data2 == 1 +	return i.rn.data2 == 1  }  func (i *instruction) condBrOffset() int64 { -	return int64(i.rd.data) +	return int64(i.rn.data)  }  func (i *instruction) condBrCond() cond { @@ -746,33 +762,33 @@ func (i *instruction) condBrCond() cond {  }  func (i *instruction) condBr64bit() bool { -	return i.u3 == 1 +	return i.u2&(1<<32) != 0  }  func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) {  	i.kind = loadFpuConst32  	i.u1 = raw -	i.rd = operandNR(rd) +	i.rd = rd  }  func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) {  	i.kind = loadFpuConst64  	i.u1 = raw -	i.rd = operandNR(rd) +	i.rd = rd  }  func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) {  	i.kind = loadFpuConst128  	i.u1 = lo  	i.u2 = hi -	i.rd = operandNR(rd) +	i.rd = rd  }  func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) {  	i.kind = fpuCmp  	i.rn, i.rm = rn, rm  	if is64bit { -		i.u3 = 1 +		i.u1 = 1  	}  } @@ -783,12 +799,12 @@ func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, i  	i.u1 = uint64(c)  	i.u2 = uint64(flag)  	if is64bit { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  }  // asALU setups a basic ALU instruction. -func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {  	switch rm.kind {  	case operandKindNR:  		i.kind = aluRRR @@ -804,22 +820,22 @@ func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) {  	i.u1 = uint64(aluOp)  	i.rd, i.rn, i.rm = rd, rn, rm  	if dst64bit { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  }  // asALU setups a basic ALU instruction. -func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { +func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) {  	i.kind = aluRRRR  	i.u1 = uint64(aluOp) -	i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra +	i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra)  	if dst64bit { -		i.u3 = 1 +		i.u1 |= 1 << 32  	}  }  // asALUShift setups a shift based ALU instruction. -func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {  	switch rm.kind {  	case operandKindNR:  		i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. @@ -831,17 +847,17 @@ func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool)  	i.u1 = uint64(aluOp)  	i.rd, i.rn, i.rm = rd, rn, rm  	if dst64bit { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  }  func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) {  	i.kind = aluRRBitmaskImm  	i.u1 = uint64(aluOp) -	i.rn, i.rd = operandNR(rn), operandNR(rd) +	i.rn, i.rd = operandNR(rn), rd  	i.u2 = imm  	if dst64bit { -		i.u3 = 1 +		i.u1 |= 1 << 32  	}  } @@ -852,76 +868,76 @@ func (i *instruction) asMovToFPSR(rn regalloc.VReg) {  func (i *instruction) asMovFromFPSR(rd regalloc.VReg) {  	i.kind = movFromFPSR -	i.rd = operandNR(rd) +	i.rd = rd  }  func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) {  	i.kind = bitRR -	i.rn, i.rd = operandNR(rn), operandNR(rd) +	i.rn, i.rd = operandNR(rn), rd  	i.u1 = uint64(bitOp)  	if is64bit {  		i.u2 = 1  	}  } -func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) {  	i.kind = fpuRRR  	i.u1 = uint64(op)  	i.rd, i.rn, i.rm = rd, rn, rm  	if dst64bit { -		i.u3 = 1 +		i.u2 = 1  	}  } -func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { +func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) {  	i.kind = fpuRR  	i.u1 = uint64(op)  	i.rd, i.rn = rd, rn  	if dst64bit { -		i.u3 = 1 +		i.u2 = 1  	}  }  func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) {  	i.kind = extend -	i.rn, i.rd = operandNR(rn), operandNR(rd) +	i.rn, i.rd = operandNR(rn), rd  	i.u1 = uint64(fromBits)  	i.u2 = uint64(toBits)  	if signed { -		i.u3 = 1 +		i.u2 |= 1 << 32  	}  }  func (i *instruction) asMove32(rd, rn regalloc.VReg) {  	i.kind = mov32 -	i.rn, i.rd = operandNR(rn), operandNR(rd) +	i.rn, i.rd = operandNR(rn), rd  }  func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction {  	i.kind = mov64 -	i.rn, i.rd = operandNR(rn), operandNR(rd) +	i.rn, i.rd = operandNR(rn), rd  	return i  }  func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) {  	i.kind = fpuMov64 -	i.rn, i.rd = operandNR(rn), operandNR(rd) +	i.rn, i.rd = operandNR(rn), rd  }  func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction {  	i.kind = fpuMov128 -	i.rn, i.rd = operandNR(rn), operandNR(rd) +	i.rn, i.rd = operandNR(rn), rd  	return i  } -func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) {  	i.kind = movToVec  	i.rd = rd  	i.rn = rn  	i.u1, i.u2 = uint64(arr), uint64(index)  } -func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { +func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) {  	if signed {  		i.kind = movFromVecSigned  	} else { @@ -932,48 +948,48 @@ func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vec  	i.u1, i.u2 = uint64(arr), uint64(index)  } -func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) {  	i.kind = vecDup  	i.u1 = uint64(arr)  	i.rn, i.rd = rn, rd  } -func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) {  	i.kind = vecDupElement  	i.u1 = uint64(arr)  	i.rn, i.rd = rn, rd  	i.u2 = uint64(index)  } -func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { +func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) {  	i.kind = vecExtract  	i.u1 = uint64(arr)  	i.rn, i.rm, i.rd = rn, rm, rd  	i.u2 = uint64(index)  } -func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { +func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) {  	i.kind = vecMovElement  	i.u1 = uint64(arr) -	i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) +	i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32  	i.rn, i.rd = rn, rd  } -func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) {  	i.kind = vecMisc  	i.u1 = uint64(op)  	i.rn, i.rd = rn, rd  	i.u2 = uint64(arr)  } -func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) {  	i.kind = vecLanes  	i.u1 = uint64(op)  	i.rn, i.rd = rn, rd  	i.u2 = uint64(arr)  } -func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction {  	i.kind = vecShiftImm  	i.u1 = uint64(op)  	i.rn, i.rm, i.rd = rn, rm, rd @@ -981,7 +997,7 @@ func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrange  	return i  } -func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {  	switch nregs {  	case 0, 1:  		i.kind = vecTbl @@ -1000,14 +1016,14 @@ func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangemen  	i.u2 = uint64(arr)  } -func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {  	i.kind = vecPermute  	i.u1 = uint64(op)  	i.rn, i.rm, i.rd = rn, rm, rd  	i.u2 = uint64(arr)  } -func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction {  	i.kind = vecRRR  	i.u1 = uint64(op)  	i.rn, i.rd, i.rm = rn, rd, rm @@ -1017,7 +1033,7 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement)  // asVecRRRRewrite encodes a vector instruction that rewrites the destination register.  // IMPORTANT: the destination register must be already defined before this instruction. -func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {  	i.kind = vecRRRRewrite  	i.u1 = uint64(op)  	i.rn, i.rd, i.rm = rn, rd, rm @@ -1033,8 +1049,8 @@ func (i *instruction) IsCopy() bool {  // String implements fmt.Stringer.  func (i *instruction) String() (str string) { -	is64SizeBitToSize := func(u3 uint64) byte { -		if u3 == 0 { +	is64SizeBitToSize := func(v uint64) byte { +		if v == 0 {  			return 32  		}  		return 64 @@ -1049,46 +1065,46 @@ func (i *instruction) String() (str string) {  			str = "nop0"  		}  	case aluRRR: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2 >> 32)  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), -			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), +			formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size),  			i.rm.format(size))  	case aluRRRR: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u1 >> 32)  		str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), -			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) +			formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size))  	case aluRRImm12: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2 >> 32)  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), -			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) +			formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size))  	case aluRRBitmaskImm: -		size := is64SizeBitToSize(i.u3) -		rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) +		size := is64SizeBitToSize(i.u1 >> 32) +		rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size)  		if size == 32 {  			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2))  		} else {  			str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2)  		}  	case aluRRImmShift: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2 >> 32)  		str = fmt.Sprintf("%s %s, %s, %#x",  			aluOp(i.u1).String(), -			formatVRegSized(i.rd.nr(), size), +			formatVRegSized(i.rd, size),  			formatVRegSized(i.rn.nr(), size),  			i.rm.shiftImm(),  		)  	case aluRRRShift: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2 >> 32)  		str = fmt.Sprintf("%s %s, %s, %s",  			aluOp(i.u1).String(), -			formatVRegSized(i.rd.nr(), size), +			formatVRegSized(i.rd, size),  			formatVRegSized(i.rn.nr(), size),  			i.rm.format(size),  		)  	case aluRRRExtend: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2 >> 32)  		str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), -			formatVRegSized(i.rd.nr(), size), +			formatVRegSized(i.rd, size),  			formatVRegSized(i.rn.nr(), size),  			// Regardless of the source size, the register is formatted in 32-bit.  			i.rm.format(32), @@ -1097,57 +1113,57 @@ func (i *instruction) String() (str string) {  		size := is64SizeBitToSize(i.u2)  		str = fmt.Sprintf("%s %s, %s",  			bitOp(i.u1), -			formatVRegSized(i.rd.nr(), size), +			formatVRegSized(i.rd, size),  			formatVRegSized(i.rn.nr(), size),  		)  	case uLoad8: -		str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))  	case sLoad8: -		str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))  	case uLoad16: -		str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))  	case sLoad16: -		str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))  	case uLoad32: -		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))  	case sLoad32: -		str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))  	case uLoad64: -		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) +		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64))  	case store8: -		str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) +		str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8))  	case store16: -		str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) +		str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16))  	case store32: -		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32))  	case store64: -		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) +		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64))  	case storeP64:  		str = fmt.Sprintf("stp %s, %s, %s", -			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) +			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64))  	case loadP64:  		str = fmt.Sprintf("ldp %s, %s, %s", -			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) +			formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64))  	case mov64:  		str = fmt.Sprintf("mov %s, %s", -			formatVRegSized(i.rd.nr(), 64), +			formatVRegSized(i.rd, 64),  			formatVRegSized(i.rn.nr(), 64))  	case mov32: -		str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) +		str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32))  	case movZ: -		size := is64SizeBitToSize(i.u3) -		str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) +		size := is64SizeBitToSize(i.u2 >> 32) +		str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)  	case movN: -		size := is64SizeBitToSize(i.u3) -		str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) +		size := is64SizeBitToSize(i.u2 >> 32) +		str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)  	case movK: -		size := is64SizeBitToSize(i.u3) -		str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) +		size := is64SizeBitToSize(i.u2 >> 32) +		str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16)  	case extend:  		fromBits, toBits := byte(i.u1), byte(i.u2)  		var signedStr string -		if i.u3 == 1 { +		if i.u2>>32 == 1 {  			signedStr = "s"  		} else {  			signedStr = "u" @@ -1161,39 +1177,39 @@ func (i *instruction) String() (str string) {  		case 32:  			fromStr = "w"  		} -		str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) +		str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32))  	case cSel: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2)  		str = fmt.Sprintf("csel %s, %s, %s, %s", -			formatVRegSized(i.rd.nr(), size), +			formatVRegSized(i.rd, size),  			formatVRegSized(i.rn.nr(), size),  			formatVRegSized(i.rm.nr(), size),  			condFlag(i.u1),  		)  	case cSet:  		if i.u2 != 0 { -			str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) +			str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1))  		} else { -			str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) +			str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1))  		}  	case cCmpImm: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2 >> 32)  		str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s",  			formatVRegSized(i.rn.nr(), size), i.rm.data,  			i.u2&0b1111,  			condFlag(i.u1))  	case fpuMov64:  		str = fmt.Sprintf("mov %s, %s", -			formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), +			formatVRegVec(i.rd, vecArrangement8B, vecIndexNone),  			formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone))  	case fpuMov128:  		str = fmt.Sprintf("mov %s, %s", -			formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), +			formatVRegVec(i.rd, vecArrangement16B, vecIndexNone),  			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone))  	case fpuMovFromVec:  		panic("TODO")  	case fpuRR: -		dstSz := is64SizeBitToSize(i.u3) +		dstSz := is64SizeBitToSize(i.u2)  		srcSz := dstSz  		op := fpuUniOp(i.u1)  		switch op { @@ -1203,38 +1219,38 @@ func (i *instruction) String() (str string) {  			srcSz = 64  		}  		str = fmt.Sprintf("%s %s, %s", op.String(), -			formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) +			formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz))  	case fpuRRR: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2)  		str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), -			formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) +			formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))  	case fpuRRI:  		panic("TODO")  	case fpuRRRR:  		panic("TODO")  	case fpuCmp: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u1)  		str = fmt.Sprintf("fcmp %s, %s",  			formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size))  	case fpuLoad32: -		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) +		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32))  	case fpuStore32: -		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) +		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64))  	case fpuLoad64: -		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) +		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64))  	case fpuStore64: -		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) +		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64))  	case fpuLoad128: -		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) +		str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64))  	case fpuStore128: -		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) +		str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64))  	case loadFpuConst32: -		str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) +		str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1)))  	case loadFpuConst64: -		str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) +		str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1))  	case loadFpuConst128:  		str = fmt.Sprintf("ldr %s, #8; b 32; data.v128  %016x %016x", -			formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) +			formatVRegSized(i.rd, 128), i.u1, i.u2)  	case fpuToInt:  		var op, src, dst string  		if signed := i.u1 == 1; signed { @@ -1242,15 +1258,15 @@ func (i *instruction) String() (str string) {  		} else {  			op = "fcvtzu"  		} -		if src64 := i.u2 == 1; src64 { +		if src64 := i.u2&1 != 0; src64 {  			src = formatVRegWidthVec(i.rn.nr(), vecArrangementD)  		} else {  			src = formatVRegWidthVec(i.rn.nr(), vecArrangementS)  		} -		if dst64 := i.u3 == 1; dst64 { -			dst = formatVRegSized(i.rd.nr(), 64) +		if dst64 := i.u2&2 != 0; dst64 { +			dst = formatVRegSized(i.rd, 64)  		} else { -			dst = formatVRegSized(i.rd.nr(), 32) +			dst = formatVRegSized(i.rd, 32)  		}  		str = fmt.Sprintf("%s %s, %s", op, dst, src) @@ -1261,21 +1277,21 @@ func (i *instruction) String() (str string) {  		} else {  			op = "ucvtf"  		} -		if src64 := i.u2 == 1; src64 { +		if src64 := i.u2&1 != 0; src64 {  			src = formatVRegSized(i.rn.nr(), 64)  		} else {  			src = formatVRegSized(i.rn.nr(), 32)  		} -		if dst64 := i.u3 == 1; dst64 { -			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) +		if dst64 := i.u2&2 != 0; dst64 { +			dst = formatVRegWidthVec(i.rd, vecArrangementD)  		} else { -			dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) +			dst = formatVRegWidthVec(i.rd, vecArrangementS)  		}  		str = fmt.Sprintf("%s %s, %s", op, dst, src)  	case fpuCSel: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2)  		str = fmt.Sprintf("fcsel %s, %s, %s, %s", -			formatVRegSized(i.rd.nr(), size), +			formatVRegSized(i.rd, size),  			formatVRegSized(i.rn.nr(), size),  			formatVRegSized(i.rm.nr(), size),  			condFlag(i.u1), @@ -1291,7 +1307,7 @@ func (i *instruction) String() (str string) {  		default:  			panic("unsupported arrangement " + arr.String())  		} -		str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) +		str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size))  	case movFromVec, movFromVecSigned:  		var size byte  		var opcode string @@ -1315,23 +1331,23 @@ func (i *instruction) String() (str string) {  		default:  			panic("unsupported arrangement " + arr.String())  		} -		str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) +		str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)))  	case vecDup:  		str = fmt.Sprintf("dup %s, %s", -			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), +			formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone),  			formatVRegSized(i.rn.nr(), 64),  		)  	case vecDupElement:  		arr := vecArrangement(i.u1)  		str = fmt.Sprintf("dup %s, %s", -			formatVRegVec(i.rd.nr(), arr, vecIndexNone), +			formatVRegVec(i.rd, arr, vecIndexNone),  			formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)),  		)  	case vecDupFromFpu:  		panic("TODO")  	case vecExtract:  		str = fmt.Sprintf("ext %s, %s, %s, #%d", -			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), +			formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone),  			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone),  			formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone),  			uint32(i.u2), @@ -1340,15 +1356,15 @@ func (i *instruction) String() (str string) {  		panic("TODO")  	case vecMovElement:  		str = fmt.Sprintf("mov %s, %s", -			formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), -			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), +			formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)), +			formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)),  		)  	case vecMiscNarrow:  		panic("TODO")  	case vecRRR, vecRRRRewrite:  		str = fmt.Sprintf("%s %s, %s, %s",  			vecOp(i.u1), -			formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), +			formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),  			formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone),  			formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone),  		) @@ -1356,12 +1372,12 @@ func (i *instruction) String() (str string) {  		vop := vecOp(i.u1)  		if vop == vecOpCmeq0 {  			str = fmt.Sprintf("cmeq %s, %s, #0", -				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), +				formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),  				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))  		} else {  			str = fmt.Sprintf("%s %s, %s",  				vop, -				formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), +				formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone),  				formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone))  		}  	case vecLanes: @@ -1379,24 +1395,24 @@ func (i *instruction) String() (str string) {  		}  		str = fmt.Sprintf("%s %s, %s",  			vecOp(i.u1), -			formatVRegWidthVec(i.rd.nr(), destArr), +			formatVRegWidthVec(i.rd, destArr),  			formatVRegVec(i.rn.nr(), arr, vecIndexNone))  	case vecShiftImm:  		arr := vecArrangement(i.u2)  		str = fmt.Sprintf("%s %s, %s, #%d",  			vecOp(i.u1), -			formatVRegVec(i.rd.nr(), arr, vecIndexNone), +			formatVRegVec(i.rd, arr, vecIndexNone),  			formatVRegVec(i.rn.nr(), arr, vecIndexNone),  			i.rm.shiftImm())  	case vecTbl:  		arr := vecArrangement(i.u2)  		str = fmt.Sprintf("tbl %s, { %s }, %s", -			formatVRegVec(i.rd.nr(), arr, vecIndexNone), +			formatVRegVec(i.rd, arr, vecIndexNone),  			formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone),  			formatVRegVec(i.rm.nr(), arr, vecIndexNone))  	case vecTbl2:  		arr := vecArrangement(i.u2) -		rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() +		rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr()  		rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType())  		str = fmt.Sprintf("tbl %s, { %s, %s }, %s",  			formatVRegVec(rd, arr, vecIndexNone), @@ -1407,13 +1423,13 @@ func (i *instruction) String() (str string) {  		arr := vecArrangement(i.u2)  		str = fmt.Sprintf("%s %s, %s, %s",  			vecOp(i.u1), -			formatVRegVec(i.rd.nr(), arr, vecIndexNone), +			formatVRegVec(i.rd, arr, vecIndexNone),  			formatVRegVec(i.rn.nr(), arr, vecIndexNone),  			formatVRegVec(i.rm.nr(), arr, vecIndexNone))  	case movToFPSR:  		str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64))  	case movFromFPSR: -		str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) +		str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64))  	case call:  		str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1))  	case callInd: @@ -1422,15 +1438,15 @@ func (i *instruction) String() (str string) {  		str = "ret"  	case br:  		target := label(i.u1) -		if i.u3 != 0 { +		if i.rm.data != 0 {  			str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String())  		} else {  			str = fmt.Sprintf("b %s", target.String())  		}  	case condBr: -		size := is64SizeBitToSize(i.u3) +		size := is64SizeBitToSize(i.u2 >> 32)  		c := cond(i.u1) -		target := label(i.u2) +		target := label(i.u2 & 0xffffffff)  		switch c.kind() {  		case condKindRegisterZero:  			if !i.condBrOffsetResolved() { @@ -1456,7 +1472,7 @@ func (i *instruction) String() (str string) {  			}  		}  	case adr: -		str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) +		str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1))  	case brTableSequence:  		targetIndex := i.u1  		str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex) @@ -1473,7 +1489,7 @@ func (i *instruction) String() (str string) {  		case 1:  			m = m + "b"  		} -		str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) +		str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64))  	case atomicCas:  		m := "casal"  		size := byte(32) @@ -1485,7 +1501,7 @@ func (i *instruction) String() (str string) {  		case 1:  			m = m + "b"  		} -		str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) +		str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64))  	case atomicLoad:  		m := "ldar"  		size := byte(32) @@ -1497,7 +1513,7 @@ func (i *instruction) String() (str string) {  		case 1:  			m = m + "b"  		} -		str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) +		str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64))  	case atomicStore:  		m := "stlr"  		size := byte(32) @@ -1517,9 +1533,9 @@ func (i *instruction) String() (str string) {  	case emitSourceOffsetInfo:  		str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1))  	case vecLoad1R: -		str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) +		str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64))  	case loadConstBlockArg: -		str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1) +		str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1)  	default:  		panic(i.kind)  	} @@ -1528,26 +1544,26 @@ func (i *instruction) String() (str string) {  func (i *instruction) asAdr(rd regalloc.VReg, offset int64) {  	i.kind = adr -	i.rd = operandNR(rd) +	i.rd = rd  	i.u1 = uint64(offset)  } -func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) {  	i.kind = atomicRmw -	i.rd, i.rn, i.rm = rt, rn, rs +	i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs)  	i.u1 = uint64(op)  	i.u2 = size  } -func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) {  	i.kind = atomicCas -	i.rm, i.rn, i.rd = rt, rn, rs +	i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs  	i.u2 = size  } -func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) { +func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) {  	i.kind = atomicLoad -	i.rn, i.rd = rn, rt +	i.rn, i.rd = operandNR(rn), rt  	i.u2 = size  } @@ -1755,12 +1771,12 @@ func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.V  	i.kind = loadConstBlockArg  	i.u1 = v  	i.u2 = uint64(typ) -	i.rd = operandNR(dst) +	i.rd = dst  	return i  }  func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) { -	return i.u1, ssa.Type(i.u2), i.rd.nr() +	return i.u1, ssa.Type(i.u2), i.rd  }  func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { @@ -1778,7 +1794,7 @@ func (i *instruction) asUDF() *instruction {  	return i  } -func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { +func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) {  	i.kind = fpuToInt  	i.rn = rn  	i.rd = rd @@ -1789,11 +1805,11 @@ func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bo  		i.u2 = 1  	}  	if dst64bit { -		i.u3 = 1 +		i.u2 |= 2  	}  } -func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { +func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) {  	i.kind = intToFpu  	i.rn = rn  	i.rd = rd @@ -1804,7 +1820,7 @@ func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bo  		i.u2 = 1  	}  	if dst64bit { -		i.u3 = 1 +		i.u2 |= 2  	}  } @@ -1817,7 +1833,7 @@ func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction {  // aluOp determines the type of ALU operation. Instructions whose kind is one of  // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend  // would use this type. -type aluOp int +type aluOp uint32  func (a aluOp) String() string {  	switch a { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 227a96474..f0ede2d6a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) {  	case callInd:  		c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true))  	case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: -		c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) +		c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode()))  	case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: -		c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) +		c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode()))  	case vecLoad1R:  		c.Emit4Bytes(encodeVecLoad1R( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(i.u1)))  	case condBr: @@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) {  			panic("BUG")  		}  	case movN: -		c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) +		c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))  	case movZ: -		c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) +		c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))  	case movK: -		c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) +		c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32)))  	case mov32: -		to, from := i.rd.realReg(), i.rn.realReg() +		to, from := i.rd.RealReg(), i.rn.realReg()  		c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to]))  	case mov64: -		to, from := i.rd.realReg(), i.rn.realReg() +		to, from := i.rd.RealReg(), i.rn.realReg()  		toIsSp := to == sp  		fromIsSp := from == sp  		c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp))  	case loadP64, storeP64:  		rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] -		amode := i.amode +		amode := i.getAmode()  		rn := regNumberInEncoding[amode.rn.RealReg()]  		var pre bool  		switch amode.kind { @@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) {  		}  		c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm))  	case loadFpuConst32: -		rd := regNumberInEncoding[i.rd.realReg()] +		rd := regNumberInEncoding[i.rd.RealReg()]  		if i.u1 == 0 {  			c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))  		} else {  			encodeLoadFpuConst32(c, rd, i.u1)  		}  	case loadFpuConst64: -		rd := regNumberInEncoding[i.rd.realReg()] +		rd := regNumberInEncoding[i.rd.RealReg()]  		if i.u1 == 0 {  			c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B))  		} else { -			encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1) +			encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1)  		}  	case loadFpuConst128: -		rd := regNumberInEncoding[i.rd.realReg()] +		rd := regNumberInEncoding[i.rd.RealReg()]  		lo, hi := i.u1, i.u2  		if lo == 0 && hi == 0 {  			c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) @@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) {  	case aluRRRR:  		c.Emit4Bytes(encodeAluRRRR(  			aluOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()], -			regNumberInEncoding[i.ra.realReg()], -			uint32(i.u3), +			regNumberInEncoding[regalloc.VReg(i.u2).RealReg()], +			uint32(i.u1>>32),  		))  	case aluRRImmShift:  		c.Emit4Bytes(encodeAluRRImm(  			aluOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			uint32(i.rm.shiftImm()), -			uint32(i.u3), +			uint32(i.u2>>32),  		))  	case aluRRR:  		rn := i.rn.realReg()  		c.Emit4Bytes(encodeAluRRR(  			aluOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[rn],  			regNumberInEncoding[i.rm.realReg()], -			i.u3 == 1, +			i.u2>>32 == 1,  			rn == sp,  		))  	case aluRRRExtend:  		rm, exo, to := i.rm.er()  		c.Emit4Bytes(encodeAluRRRExtend(  			aluOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[rm.RealReg()],  			exo, @@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) {  		r, amt, sop := i.rm.sr()  		c.Emit4Bytes(encodeAluRRRShift(  			aluOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[r.RealReg()],  			uint32(amt),  			sop, -			i.u3 == 1, +			i.u2>>32 == 1,  		))  	case aluRRBitmaskImm:  		c.Emit4Bytes(encodeAluBitmaskImmediate(  			aluOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			i.u2, -			i.u3 == 1, +			i.u1>>32 == 1,  		))  	case bitRR:  		c.Emit4Bytes(encodeBitRR(  			bitOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			uint32(i.u2)),  		) @@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) {  		imm12, shift := i.rm.imm12()  		c.Emit4Bytes(encodeAluRRImm12(  			aluOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			imm12, shift, -			i.u3 == 1, +			i.u2>>32 == 1,  		))  	case fpuRRR:  		c.Emit4Bytes(encodeFpuRRR(  			fpuBinOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()], -			i.u3 == 1, +			i.u2 == 1,  		))  	case fpuMov64, fpuMov128:  		// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- -		rd := regNumberInEncoding[i.rd.realReg()] +		rd := regNumberInEncoding[i.rd.RealReg()]  		rn := regNumberInEncoding[i.rn.realReg()]  		var q uint32  		if kind == fpuMov128 { @@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) {  		}  		c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd)  	case cSet: -		rd := regNumberInEncoding[i.rd.realReg()] +		rd := regNumberInEncoding[i.rd.RealReg()]  		cf := condFlag(i.u1)  		if i.u2 == 1 {  			// https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- @@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) {  			c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd)  		}  	case extend: -		c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()])) +		c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()]))  	case fpuCmp:  		// https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en  		rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()]  		var ftype uint32 -		if i.u3 == 1 { +		if i.u1 == 1 {  			ftype = 0b01 // double precision.  		}  		c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) @@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) {  			c.Emit4Bytes(0)  		}  	case adr: -		c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) +		c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1)))  	case cSel:  		c.Emit4Bytes(encodeConditionalSelect(  			kind, -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()],  			condFlag(i.u1), -			i.u3 == 1, +			i.u2 == 1,  		))  	case fpuCSel:  		c.Emit4Bytes(encodeFpuCSel( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()],  			condFlag(i.u1), -			i.u3 == 1, +			i.u2 == 1,  		))  	case movToVec:  		c.Emit4Bytes(encodeMoveToVec( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(byte(i.u1)),  			vecIndex(i.u2),  		))  	case movFromVec, movFromVecSigned:  		c.Emit4Bytes(encodeMoveFromVec( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(byte(i.u1)),  			vecIndex(i.u2), @@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) {  		))  	case vecDup:  		c.Emit4Bytes(encodeVecDup( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(byte(i.u1))))  	case vecDupElement:  		c.Emit4Bytes(encodeVecDupElement( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(byte(i.u1)),  			vecIndex(i.u2)))  	case vecExtract:  		c.Emit4Bytes(encodeVecExtract( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()],  			vecArrangement(byte(i.u1)), @@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) {  	case vecPermute:  		c.Emit4Bytes(encodeVecPermute(  			vecOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()],  			vecArrangement(byte(i.u2))))  	case vecMovElement:  		c.Emit4Bytes(encodeVecMovElement( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(i.u1), -			uint32(i.u2), uint32(i.u3), +			uint32(i.u2), uint32(i.u2>>32),  		))  	case vecMisc:  		c.Emit4Bytes(encodeAdvancedSIMDTwoMisc(  			vecOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(i.u2),  		))  	case vecLanes:  		c.Emit4Bytes(encodeVecLanes(  			vecOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			vecArrangement(i.u2),  		))  	case vecShiftImm:  		c.Emit4Bytes(encodeVecShiftImm(  			vecOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			uint32(i.rm.shiftImm()),  			vecArrangement(i.u2), @@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) {  	case vecTbl:  		c.Emit4Bytes(encodeVecTbl(  			1, -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()],  			vecArrangement(i.u2)), @@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) {  	case vecTbl2:  		c.Emit4Bytes(encodeVecTbl(  			2, -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()],  			vecArrangement(i.u2)), @@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) {  	case fpuRR:  		c.Emit4Bytes(encodeFloatDataOneSource(  			fpuUniOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()], -			i.u3 == 1, +			i.u2 == 1,  		))  	case vecRRR:  		if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { @@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) {  	case vecRRRRewrite:  		c.Emit4Bytes(encodeVecRRR(  			vecOp(i.u1), -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			regNumberInEncoding[i.rm.realReg()],  			vecArrangement(i.u2),  		))  	case cCmpImm:  		// Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en -		sf := uint32(i.u3 & 0b1) +		sf := uint32((i.u2 >> 32) & 0b1)  		nzcv := uint32(i.u2 & 0b1111)  		cond := uint32(condFlag(i.u1))  		imm := uint32(i.rm.data & 0b11111) @@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) {  			sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv,  		)  	case movFromFPSR: -		rt := regNumberInEncoding[i.rd.realReg()] +		rt := regNumberInEncoding[i.rd.RealReg()]  		c.Emit4Bytes(encodeSystemRegisterMove(rt, true))  	case movToFPSR:  		rt := regNumberInEncoding[i.rn.realReg()] @@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) {  		c.Emit4Bytes(encodeAtomicRmw(  			atomicRmwOp(i.u1),  			regNumberInEncoding[i.rm.realReg()], -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rn.realReg()],  			uint32(i.u2),  		))  	case atomicCas:  		c.Emit4Bytes(encodeAtomicCas( -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			regNumberInEncoding[i.rm.realReg()],  			regNumberInEncoding[i.rn.realReg()],  			uint32(i.u2), @@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) {  	case atomicLoad:  		c.Emit4Bytes(encodeAtomicLoadStore(  			regNumberInEncoding[i.rn.realReg()], -			regNumberInEncoding[i.rd.realReg()], +			regNumberInEncoding[i.rd.RealReg()],  			uint32(i.u2),  			1,  		)) @@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32  // encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in  // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en  func encodeCnvBetweenFloatInt(i *instruction) uint32 { -	rd := regNumberInEncoding[i.rd.realReg()] +	rd := regNumberInEncoding[i.rd.RealReg()]  	rn := regNumberInEncoding[i.rn.realReg()]  	var opcode uint32 @@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {  		rmode = 0b00  		signed := i.u1 == 1 -		src64bit := i.u2 == 1 -		dst64bit := i.u3 == 1 +		src64bit := i.u2&1 != 0 +		dst64bit := i.u2&2 != 0  		if signed {  			opcode = 0b010  		} else { @@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 {  		rmode = 0b11  		signed := i.u1 == 1 -		src64bit := i.u2 == 1 -		dst64bit := i.u3 == 1 +		src64bit := i.u2&1 != 0 +		dst64bit := i.u2&2 != 0  		if signed {  			opcode = 0b000 @@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) {  // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en  //  // "shift" must have been divided by 16 at this point. -func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) { +func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) {  	ret = rd  	ret |= uint32(imm&0xffff) << 5 -	ret |= (uint32(shift)) << 21 +	ret |= (shift) << 21  	ret |= 0b100101 << 23  	ret |= opc << 29 -	ret |= uint32(_64bit) << 31 +	ret |= _64bit << 31  	return  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go index 698b382d4..6c6824fb0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go @@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) {  func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) {  	instr := m.allocateInstr() -	instr.asMOVZ(dst, v, uint64(shift), dst64) +	instr.asMOVZ(dst, v, uint32(shift), dst64)  	m.insert(instr)  }  func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) {  	instr := m.allocateInstr() -	instr.asMOVK(dst, v, uint64(shift), dst64) +	instr.asMOVK(dst, v, uint32(shift), dst64)  	m.insert(instr)  }  func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) {  	instr := m.allocateInstr() -	instr.asMOVN(dst, v, uint64(shift), dst64) +	instr.asMOVN(dst, v, uint32(shift), dst64)  	m.insert(instr)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 2bb234e8c..048bf3204 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -52,11 +52,11 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) {  	maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32)  	m.lowerConstantI32(maxIndexReg, int32(len(targets)-1))  	subs := m.allocateInstr() -	subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false) +	subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false)  	m.insert(subs)  	csel := m.allocateInstr()  	adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32) -	csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false) +	csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false)  	m.insert(csel)  	brSequence := m.allocateInstr() @@ -249,7 +249,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  			rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone)  			rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  			rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -			rd := operandNR(m.compiler.VRegOf(instr.Return())) +			rd := m.compiler.VRegOf(instr.Return())  			m.lowerSelectVec(rc, rn, rm, rd)  		} else {  			m.lowerSelect(c, x, y, instr.Return()) @@ -270,7 +270,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		x, ctx := instr.Arg2()  		result := instr.Return()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(result)) +		rd := m.compiler.VRegOf(result)  		ctxVReg := m.compiler.VRegOf(ctx)  		m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64,  			result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat) @@ -278,7 +278,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		x, ctx := instr.Arg2()  		result := instr.Return()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(result)) +		rd := m.compiler.VRegOf(result)  		ctxVReg := m.compiler.VRegOf(ctx)  		m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64,  			result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat) @@ -286,25 +286,25 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		x := instr.Arg()  		result := instr.Return()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(result)) +		rd := m.compiler.VRegOf(result)  		m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)  	case ssa.OpcodeFcvtFromUint:  		x := instr.Arg()  		result := instr.Return()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(result)) +		rd := m.compiler.VRegOf(result)  		m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64)  	case ssa.OpcodeFdemote:  		v := instr.Arg()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		cnt := m.allocateInstr()  		cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false)  		m.insert(cnt)  	case ssa.OpcodeFpromote:  		v := instr.Arg()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		cnt := m.allocateInstr()  		cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true)  		m.insert(cnt) @@ -343,15 +343,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		ctxVReg := m.compiler.VRegOf(ctx)  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv)  	case ssa.OpcodeSrem, ssa.OpcodeUrem:  		x, y, ctx := instr.Arg3()  		ctxVReg := m.compiler.VRegOf(ctx)  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) -		m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) +		rd := m.compiler.VRegOf(instr.Return()) +		m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem)  	case ssa.OpcodeVconst:  		result := m.compiler.VRegOf(instr.Return())  		lo, hi := instr.VconstData() @@ -362,7 +362,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		x := instr.Arg()  		ins := m.allocateInstr()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B)  		m.insert(ins)  	case ssa.OpcodeVbxor: @@ -382,12 +382,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)  		creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) -		tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) +		tmp := m.compiler.AllocateVReg(ssa.TypeV128)  		// creg is overwritten by BSL, so we need to move it to the result register before the instruction  		// in case when it is used somewhere else.  		mov := m.allocateInstr() -		mov.asFpuMov128(tmp.nr(), creg.nr()) +		mov.asFpuMov128(tmp, creg.nr())  		m.insert(mov)  		ins := m.allocateInstr() @@ -396,7 +396,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		mov2 := m.allocateInstr()  		rd := m.compiler.VRegOf(instr.Return()) -		mov2.asFpuMov128(rd, tmp.nr()) +		mov2.asFpuMov128(rd, tmp)  		m.insert(mov2)  	case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue:  		x, lane := instr.ArgWithLane() @@ -405,12 +405,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  			arr = ssaLaneToArrangement(lane)  		}  		rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		m.lowerVcheckTrue(op, rm, rd, arr)  	case ssa.OpcodeVhighBits:  		x, lane := instr.ArgWithLane()  		rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		arr := ssaLaneToArrangement(lane)  		m.lowerVhighBits(rm, rd, arr)  	case ssa.OpcodeVIadd: @@ -441,9 +441,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  			panic("unsupported lane " + lane.String())  		} -		widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr) -		widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr) -		addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr) +		widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr) +		widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr) +		addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr)  		m.insert(widenLo)  		m.insert(widenHi)  		m.insert(addp) @@ -493,7 +493,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		arr := ssaLaneToArrangement(lane)  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		m.lowerVIMul(rd, rn, rm, arr)  	case ssa.OpcodeVIabs:  		m.lowerVecMisc(vecOpAbs, instr) @@ -507,7 +507,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		arr := ssaLaneToArrangement(lane)  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		m.lowerVShift(op, rd, rn, rm, arr)  	case ssa.OpcodeVSqrt:  		m.lowerVecMisc(vecOpFsqrt, instr) @@ -547,18 +547,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		x, lane := instr.ArgWithLane()  		arr := ssaLaneToArrangement(lane)  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat)  	case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint:  		x, lane := instr.ArgWithLane()  		arr := ssaLaneToArrangement(lane)  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint)  	case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow:  		x, lane := instr.ArgWithLane()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		var arr vecArrangement  		switch lane { @@ -580,7 +580,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  	case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh:  		x, lane := instr.ArgWithLane()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		arr := ssaLaneToArrangement(lane) @@ -607,9 +607,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		}  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return()) -		tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) +		tmp := m.compiler.AllocateVReg(ssa.TypeV128)  		loQxtn := m.allocateInstr()  		hiQxtn := m.allocateInstr() @@ -628,7 +628,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		m.insert(hiQxtn)  		mov := m.allocateInstr() -		mov.asFpuMov128(rd.nr(), tmp.nr()) +		mov.asFpuMov128(rd, tmp)  		m.insert(mov)  	case ssa.OpcodeFvpromoteLow:  		x, lane := instr.ArgWithLane() @@ -637,7 +637,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		}  		ins := m.allocateInstr()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S)  		m.insert(ins)  	case ssa.OpcodeFvdemote: @@ -647,14 +647,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		}  		ins := m.allocateInstr()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S)  		m.insert(ins)  	case ssa.OpcodeExtractlane:  		x, index, signed, lane := instr.ExtractlaneData()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		mov := m.allocateInstr()  		switch lane { @@ -680,12 +680,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		x, y, index, lane := instr.InsertlaneData()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) -		tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) +		rd := m.compiler.VRegOf(instr.Return()) +		tmpReg := m.compiler.AllocateVReg(ssa.TypeV128)  		// Initially mov rn to tmp.  		mov1 := m.allocateInstr() -		mov1.asFpuMov128(tmpReg.nr(), rn.nr()) +		mov1.asFpuMov128(tmpReg, rn.nr())  		m.insert(mov1)  		// movToVec and vecMovElement do not clear the remaining bits to zero, @@ -709,14 +709,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		// Finally mov tmp to rd.  		mov3 := m.allocateInstr() -		mov3.asFpuMov128(rd.nr(), tmpReg.nr()) +		mov3.asFpuMov128(rd, tmpReg)  		m.insert(mov3)  	case ssa.OpcodeSwizzle:  		x, y, lane := instr.Arg2WithLane()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		arr := ssaLaneToArrangement(lane) @@ -729,14 +729,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		x, y, lane1, lane2 := instr.ShuffleData()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  		rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		m.lowerShuffle(rd, rn, rm, lane1, lane2)  	case ssa.OpcodeSplat:  		x, lane := instr.ArgWithLane()  		rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) +		rd := m.compiler.VRegOf(instr.Return())  		dup := m.allocateInstr()  		switch lane { @@ -760,12 +760,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  		xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone),  			m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone)  		tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) -		m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H)) -		m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H)) -		m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S)) +		m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H)) +		m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H)) +		m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S)) -		rd := operandNR(m.compiler.VRegOf(instr.Return())) -		m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr())) +		rd := m.compiler.VRegOf(instr.Return()) +		m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr()))  	case ssa.OpcodeLoadSplat:  		ptr, offset, lane := instr.LoadSplatData() @@ -794,7 +794,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) {  	m.executableContext.FlushPendingInstructions()  } -func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { +func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) {  	// `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30.  	vReg, wReg := v29VReg, v30VReg @@ -822,7 +822,7 @@ func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) {  	m.insert(tbl2)  } -func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) {  	var modulo byte  	switch arr {  	case vecArrangement16B: @@ -847,13 +847,13 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem  	if op != ssa.OpcodeVIshl {  		// Negate the amount to make this as right shift.  		neg := m.allocateInstr() -		neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true) +		neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true)  		m.insert(neg)  	}  	// Copy the shift amount into a vector register as sshl/ushl requires it to be there.  	dup := m.allocateInstr() -	dup.asVecDup(vtmp, rtmp, arr) +	dup.asVecDup(vtmp.nr(), rtmp, arr)  	m.insert(dup)  	if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr { @@ -867,7 +867,7 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem  	}  } -func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) {  	tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))  	// Special case VallTrue for i64x2. @@ -878,11 +878,11 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem  		//	cset dst, eq  		ins := m.allocateInstr() -		ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D) +		ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D)  		m.insert(ins)  		addp := m.allocateInstr() -		addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D) +		addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D)  		m.insert(addp)  		fcmp := m.allocateInstr() @@ -890,7 +890,7 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem  		m.insert(fcmp)  		cset := m.allocateInstr() -		cset.asCSet(rd.nr(), false, eq) +		cset.asCSet(rd, false, eq)  		m.insert(cset)  		return @@ -900,10 +900,10 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem  	ins := m.allocateInstr()  	if op == ssa.OpcodeVanyTrue {  		// 	umaxp v4?.16b, v2?.16b, v2?.16b -		ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B) +		ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B)  	} else {  		// 	uminv d4?, v2?.4s -		ins.asVecLanes(vecOpUminv, tmp, rm, arr) +		ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr)  	}  	m.insert(ins) @@ -917,15 +917,15 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem  	m.insert(movv)  	fc := m.allocateInstr() -	fc.asCCmpImm(rd, uint64(0), al, 0, true) +	fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true)  	m.insert(fc)  	cset := m.allocateInstr() -	cset.asCSet(rd.nr(), false, ne) +	cset.asCSet(rd, false, ne)  	m.insert(cset)  } -func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) {  	r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64))  	v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128))  	v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) @@ -947,7 +947,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// Right arithmetic shift on the original vector and store the result into v1. So we have:  		// v1[i] = 0xff if vi<0, 0 otherwise.  		sshr := m.allocateInstr() -		sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B) +		sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B)  		m.insert(sshr)  		// Load the bit mask into r0. @@ -958,7 +958,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// dup r0 to v0.  		dup := m.allocateInstr() -		dup.asVecDup(v0, r0, vecArrangement2D) +		dup.asVecDup(v0.nr(), r0, vecArrangement2D)  		m.insert(dup)  		// Lane-wise logical AND with the bit mask, meaning that we have @@ -967,23 +967,23 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// Below, we use the following notation:  		// wi := (1 << i) if vi<0, 0 otherwise.  		and := m.allocateInstr() -		and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B) +		and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B)  		m.insert(and)  		// Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have  		// v0[i] = w(i+8) if i < 8, w(i-8) otherwise.  		ext := m.allocateInstr() -		ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8)) +		ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8))  		m.insert(ext)  		// v = [w0, w8, ..., w7, w15]  		zip1 := m.allocateInstr() -		zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B) +		zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B)  		m.insert(zip1)  		// v.h[0] = w0 + ... + w15  		addv := m.allocateInstr() -		addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) +		addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H)  		m.insert(addv)  		// Extract the v.h[0] as the result. @@ -1006,7 +1006,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// Right arithmetic shift on the original vector and store the result into v1. So we have:  		// v[i] = 0xffff if vi<0, 0 otherwise.  		sshr := m.allocateInstr() -		sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H) +		sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H)  		m.insert(sshr)  		// Load the bit mask into r0. @@ -1014,26 +1014,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// dup r0 to vector v0.  		dup := m.allocateInstr() -		dup.asVecDup(v0, r0, vecArrangement2D) +		dup.asVecDup(v0.nr(), r0, vecArrangement2D)  		m.insert(dup)  		lsl := m.allocateInstr() -		lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true) +		lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true)  		m.insert(lsl)  		movv := m.allocateInstr() -		movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) +		movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1))  		m.insert(movv)  		// Lane-wise logical AND with the bitmask, meaning that we have  		// v[i] = (1 << i)     if vi<0, 0 otherwise for i=0..3  		//      = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7  		and := m.allocateInstr() -		and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) +		and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B)  		m.insert(and)  		addv := m.allocateInstr() -		addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) +		addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H)  		m.insert(addv)  		movfv := m.allocateInstr() @@ -1055,7 +1055,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// Right arithmetic shift on the original vector and store the result into v1. So we have:  		// v[i] = 0xffffffff if vi<0, 0 otherwise.  		sshr := m.allocateInstr() -		sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S) +		sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S)  		m.insert(sshr)  		// Load the bit mask into r0. @@ -1063,26 +1063,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// dup r0 to vector v0.  		dup := m.allocateInstr() -		dup.asVecDup(v0, r0, vecArrangement2D) +		dup.asVecDup(v0.nr(), r0, vecArrangement2D)  		m.insert(dup)  		lsl := m.allocateInstr() -		lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true) +		lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true)  		m.insert(lsl)  		movv := m.allocateInstr() -		movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) +		movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1))  		m.insert(movv)  		// Lane-wise logical AND with the bitmask, meaning that we have  		// v[i] = (1 << i)     if vi<0, 0 otherwise for i in [0, 1]  		//      = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3]  		and := m.allocateInstr() -		and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) +		and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B)  		m.insert(and)  		addv := m.allocateInstr() -		addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S) +		addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S)  		m.insert(addv)  		movfv := m.allocateInstr() @@ -1102,21 +1102,21 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) {  		// Move the higher 64-bit int into r0.  		movv1 := m.allocateInstr() -		movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false) +		movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false)  		m.insert(movv1)  		// Move the sign bit into the least significant bit.  		lsr1 := m.allocateInstr() -		lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true) +		lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true)  		m.insert(lsr1)  		lsr2 := m.allocateInstr() -		lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true) +		lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true)  		m.insert(lsr2)  		// rd = (r0<<1) | rd  		lsl := m.allocateInstr() -		lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false) +		lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false)  		m.insert(lsl)  	default:  		panic("Unsupported " + arr.String()) @@ -1128,7 +1128,7 @@ func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) {  	arr := ssaLaneToArrangement(lane)  	ins := m.allocateInstr()  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) -	rd := operandNR(m.compiler.VRegOf(instr.Return())) +	rd := m.compiler.VRegOf(instr.Return())  	ins.asVecMisc(op, rd, rn, arr)  	m.insert(ins)  } @@ -1137,22 +1137,22 @@ func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement)  	ins := m.allocateInstr()  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -	rd := operandNR(m.compiler.VRegOf(ret)) +	rd := m.compiler.VRegOf(ret)  	ins.asVecRRR(op, rd, rn, rm, arr)  	m.insert(ins)  } -func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) {  	if arr != vecArrangement2D {  		mul := m.allocateInstr()  		mul.asVecRRR(vecOpMul, rd, rn, rm, arr)  		m.insert(mul)  	} else { -		tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) -		tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) -		tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) +		tmp1 := m.compiler.AllocateVReg(ssa.TypeV128) +		tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) +		tmp3 := m.compiler.AllocateVReg(ssa.TypeV128) -		tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) +		tmpRes := m.compiler.AllocateVReg(ssa.TypeV128)  		// Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696  		rev64 := m.allocateInstr() @@ -1160,7 +1160,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {  		m.insert(rev64)  		mul := m.allocateInstr() -		mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S) +		mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S)  		m.insert(mul)  		xtn1 := m.allocateInstr() @@ -1168,7 +1168,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {  		m.insert(xtn1)  		addp := m.allocateInstr() -		addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S) +		addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S)  		m.insert(addp)  		xtn2 := m.allocateInstr() @@ -1179,15 +1179,15 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) {  		// In short, in UMLAL instruction, the result register is also one of the source register, and  		// the value on the result register is significant.  		shll := m.allocateInstr() -		shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S) +		shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S)  		m.insert(shll)  		umlal := m.allocateInstr() -		umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S) +		umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S)  		m.insert(umlal)  		mov := m.allocateInstr() -		mov.asFpuMov128(rd.nr(), tmpRes.nr()) +		mov.asFpuMov128(rd, tmpRes)  		m.insert(mov)  	}  } @@ -1203,7 +1203,7 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) {  	// BSL modifies the destination register, so we need to use a temporary register so that  	// the actual definition of the destination register happens *after* the BSL instruction.  	// That way, we can force the spill instruction to be inserted after the BSL instruction. -	tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) +	tmp := m.compiler.AllocateVReg(ssa.TypeV128)  	fcmgt := m.allocateInstr()  	if max { @@ -1220,17 +1220,17 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) {  	res := operandNR(m.compiler.VRegOf(instr.Return()))  	mov2 := m.allocateInstr() -	mov2.asFpuMov128(res.nr(), tmp.nr()) +	mov2.asFpuMov128(res.nr(), tmp)  	m.insert(mov2)  } -func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) {  	div := m.allocateInstr()  	if signed { -		div.asALU(aluOpSDiv, rd, rn, rm, _64bit) +		div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit)  	} else { -		div.asALU(aluOpUDiv, rd, rn, rm, _64bit) +		div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit)  	}  	m.insert(div) @@ -1239,11 +1239,11 @@ func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi  	// rd = rn-rd*rm by MSUB instruction.  	msub := m.allocateInstr() -	msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit) +	msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit)  	m.insert(msub)  } -func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) {  	div := m.allocateInstr()  	if signed { @@ -1260,7 +1260,7 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi  		// We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1"  		minusOneCheck := m.allocateInstr()  		// Sets eq condition if rm == -1. -		minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit) +		minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit)  		m.insert(minusOneCheck)  		ccmp := m.allocateInstr() @@ -1290,20 +1290,20 @@ func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, c  func (m *machine) lowerFcopysign(x, y, ret ssa.Value) {  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -	var tmpI, tmpF operand +	var tmpI, tmpF regalloc.VReg  	_64 := x.Type() == ssa.TypeF64  	if _64 { -		tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) -		tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) +		tmpF = m.compiler.AllocateVReg(ssa.TypeF64) +		tmpI = m.compiler.AllocateVReg(ssa.TypeI64)  	} else { -		tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32)) -		tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) +		tmpF = m.compiler.AllocateVReg(ssa.TypeF32) +		tmpI = m.compiler.AllocateVReg(ssa.TypeI32)  	}  	rd := m.compiler.VRegOf(ret) -	m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64) +	m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64)  } -func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) { +func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) {  	// This is exactly the same code emitted by GCC for "__builtin_copysign":  	//  	//    mov     x0, -9223372036854775808 @@ -1313,26 +1313,26 @@ func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool  	setMSB := m.allocateInstr()  	if _64bit { -		m.lowerConstantI64(tmpI.nr(), math.MinInt64) -		setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0)) +		m.lowerConstantI64(tmpI, math.MinInt64) +		setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0))  	} else { -		m.lowerConstantI32(tmpI.nr(), math.MinInt32) -		setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0)) +		m.lowerConstantI32(tmpI, math.MinInt32) +		setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0))  	}  	m.insert(setMSB) -	tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) +	tmpReg := m.compiler.AllocateVReg(ssa.TypeF64)  	mov := m.allocateInstr() -	mov.asFpuMov64(tmpReg.nr(), rn.nr()) +	mov.asFpuMov64(tmpReg, rn.nr())  	m.insert(mov)  	vbit := m.allocateInstr() -	vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B) +	vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B)  	m.insert(vbit)  	movDst := m.allocateInstr() -	movDst.asFpuMov64(rd.nr(), tmpReg.nr()) +	movDst.asFpuMov64(rd, tmpReg)  	m.insert(movDst)  } @@ -1340,7 +1340,7 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) {  	v, dstType := instr.BitcastData()  	srcType := v.Type()  	rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) -	rd := operandNR(m.compiler.VRegOf(instr.Return())) +	rd := m.compiler.VRegOf(instr.Return())  	srcInt := srcType.IsInt()  	dstInt := dstType.IsInt()  	switch { @@ -1371,14 +1371,14 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) {  func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) {  	rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone) -	rd := operandNR(m.compiler.VRegOf(out)) +	rd := m.compiler.VRegOf(out)  	neg := m.allocateInstr()  	neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64)  	m.insert(neg)  } -func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { +func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) {  	if !nonTrapping {  		// First of all, we have to clear the FPU flags.  		flagClear := m.allocateInstr() @@ -1405,7 +1405,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64  		// Check if the conversion was undefined by comparing the status with 1.  		// See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register  		alu := m.allocateInstr() -		alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true) +		alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true)  		m.insert(alu)  		// If it is not undefined, we can return the result. @@ -1429,7 +1429,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64  	}  } -func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) { +func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) {  	cvt := m.allocateInstr()  	cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit)  	m.insert(cvt) @@ -1456,7 +1456,7 @@ func (m *machine) lowerFpuBinOp(si *ssa.Instruction) {  	xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)  	rn := m.getOperand_NR(xDef, extModeNone)  	rm := m.getOperand_NR(yDef, extModeNone) -	rd := operandNR(m.compiler.VRegOf(si.Return())) +	rd := m.compiler.VRegOf(si.Return())  	instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64)  	m.insert(instr)  } @@ -1482,7 +1482,7 @@ func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) {  	case !add && yNegated: // rn+rm = x-(-y) = x-y  		aop = aluOpAdd  	} -	rd := operandNR(m.compiler.VRegOf(si.Return())) +	rd := m.compiler.VRegOf(si.Return())  	alu := m.allocateInstr()  	alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64)  	m.insert(alu) @@ -1527,7 +1527,7 @@ func (m *machine) lowerIcmp(si *ssa.Instruction) {  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)  	rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext)  	alu := m.allocateInstr() -	alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit) +	alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit)  	m.insert(alu)  	cset := m.allocateInstr() @@ -1542,7 +1542,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) {  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -	rd := operandNR(m.compiler.VRegOf(si.Return())) +	rd := m.compiler.VRegOf(si.Return())  	switch flag {  	case eq: @@ -1554,7 +1554,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) {  		cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr)  		m.insert(cmp)  		not := m.allocateInstr() -		not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) +		not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B)  		m.insert(not)  	case ge:  		cmp := m.allocateInstr() @@ -1598,7 +1598,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -	rd := operandNR(m.compiler.VRegOf(si.Return())) +	rd := m.compiler.VRegOf(si.Return())  	switch flag {  	case eq: @@ -1610,7 +1610,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {  		cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr)  		m.insert(cmp)  		not := m.allocateInstr() -		not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) +		not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B)  		m.insert(not)  	case ge:  		cmp := m.allocateInstr() @@ -1631,7 +1631,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) {  	}  } -func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) {  	cvt := m.allocateInstr()  	if signed {  		cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr) @@ -1643,15 +1643,15 @@ func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool  	if arr == vecArrangement2D {  		narrow := m.allocateInstr()  		if signed { -			narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S) +			narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S)  		} else { -			narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S) +			narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S)  		}  		m.insert(narrow)  	}  } -func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) {  	cvt := m.allocateInstr()  	if signed {  		cvt.asVecMisc(vecOpScvtf, rd, rn, arr) @@ -1665,7 +1665,7 @@ func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) {  	x, amount := si.Arg2()  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext)  	rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits()) -	rd := operandNR(m.compiler.VRegOf(si.Return())) +	rd := m.compiler.VRegOf(si.Return())  	alu := m.allocateInstr()  	alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64) @@ -1678,11 +1678,11 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult  	xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)  	rn := m.getOperand_NR(xDef, extModeNone) -	var rd operand +	var rd regalloc.VReg  	if ignoreResult { -		rd = operandNR(xzrVReg) +		rd = xzrVReg  	} else { -		rd = operandNR(m.compiler.VRegOf(si.Return())) +		rd = m.compiler.VRegOf(si.Return())  	}  	_64 := x.Type().Bits() == 64 @@ -1691,7 +1691,7 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult  		c := instr.ConstantVal()  		if isBitMaskImmediate(c, _64) {  			// Constant bit wise operations can be lowered to a single instruction. -			alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64) +			alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64)  			m.insert(alu)  			return  		} @@ -1709,25 +1709,25 @@ func (m *machine) lowerRotl(si *ssa.Instruction) {  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -	var tmp operand +	var tmp regalloc.VReg  	if _64 { -		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) +		tmp = m.compiler.AllocateVReg(ssa.TypeI64)  	} else { -		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) +		tmp = m.compiler.AllocateVReg(ssa.TypeI32)  	} -	rd := operandNR(m.compiler.VRegOf(r)) +	rd := m.compiler.VRegOf(r)  	// Encode rotl as neg + rotr: neg is a sub against the zero-reg.  	m.lowerRotlImpl(rd, rn, rm, tmp, _64)  } -func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) { +func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) {  	// Encode rotl as neg + rotr: neg is a sub against the zero-reg.  	neg := m.allocateInstr()  	neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit)  	m.insert(neg)  	alu := m.allocateInstr() -	alu.asALU(aluOpRotR, rd, rn, tmp, is64bit) +	alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit)  	m.insert(alu)  } @@ -1737,7 +1737,7 @@ func (m *machine) lowerRotr(si *ssa.Instruction) {  	xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y)  	rn := m.getOperand_NR(xDef, extModeNone)  	rm := m.getOperand_NR(yDef, extModeNone) -	rd := operandNR(m.compiler.VRegOf(si.Return())) +	rd := m.compiler.VRegOf(si.Return())  	alu := m.allocateInstr()  	alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64) @@ -1797,7 +1797,7 @@ func (m *machine) lowerImul(x, y, result ssa.Value) {  	// TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg.  	mul := m.allocateInstr() -	mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64) +	mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64)  	m.insert(mul)  } @@ -1849,22 +1849,22 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) {  	//    mov x5, v0.d[0]     ;; finally we mov the result back to a GPR  	// -	rd := operandNR(m.compiler.VRegOf(result)) +	rd := m.compiler.VRegOf(result)  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  	rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))  	ins := m.allocateInstr() -	ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0)) +	ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0))  	m.insert(ins)  	rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))  	cnt := m.allocateInstr() -	cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B) +	cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B)  	m.insert(cnt)  	rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64))  	uaddlv := m.allocateInstr() -	uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B) +	uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B)  	m.insert(uaddlv)  	mov := m.allocateInstr() @@ -1879,32 +1879,35 @@ func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.Ex  	loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true)  	setExitCode := m.allocateInstr() -	setExitCode.asStore(operandNR(tmpReg1), -		addressMode{ -			kind: addressModeKindRegUnsignedImm12, -			rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), -		}, 32) +	mode := m.amodePool.Allocate() +	*mode = addressMode{ +		kind: addressModeKindRegUnsignedImm12, +		rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), +	} +	setExitCode.asStore(operandNR(tmpReg1), mode, 32)  	// In order to unwind the stack, we also need to push the current stack pointer:  	tmp2 := m.compiler.AllocateVReg(ssa.TypeI64)  	movSpToTmp := m.allocateInstr()  	movSpToTmp.asMove64(tmp2, spVReg)  	strSpToExecCtx := m.allocateInstr() -	strSpToExecCtx.asStore(operandNR(tmp2), -		addressMode{ -			kind: addressModeKindRegUnsignedImm12, -			rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), -		}, 64) +	mode2 := m.amodePool.Allocate() +	*mode2 = addressMode{ +		kind: addressModeKindRegUnsignedImm12, +		rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), +	} +	strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64)  	// Also the address of this exit.  	tmp3 := m.compiler.AllocateVReg(ssa.TypeI64)  	currentAddrToTmp := m.allocateInstr()  	currentAddrToTmp.asAdr(tmp3, 0)  	storeCurrentAddrToExecCtx := m.allocateInstr() -	storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), -		addressMode{ -			kind: addressModeKindRegUnsignedImm12, -			rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), -		}, 64) +	mode3 := m.amodePool.Allocate() +	*mode3 = addressMode{ +		kind: addressModeKindRegUnsignedImm12, +		rn:   execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), +	} +	storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64)  	exitSeq := m.allocateInstr()  	exitSeq.asExitSequence(execCtxVReg) @@ -1937,7 +1940,7 @@ func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) {  	alu.asALU(  		aluOpSubS,  		// We don't need the result, just need to set flags. -		operandNR(xzrVReg), +		xzrVReg,  		rn,  		rm,  		x.Type().Bits() == 64, @@ -2012,7 +2015,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {  		alu.asALU(  			aluOpSubS,  			// We don't need the result, just need to set flags. -			operandNR(xzrVReg), +			xzrVReg,  			rn,  			operandNR(xzrVReg),  			c.Type().Bits() == 64, @@ -2024,7 +2027,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {  	rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone)  	rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) -	rd := operandNR(m.compiler.VRegOf(result)) +	rd := m.compiler.VRegOf(result)  	switch x.Type() {  	case ssa.TypeI32, ssa.TypeI64:  		// csel rd, rn, rm, cc @@ -2041,10 +2044,10 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) {  	}  } -func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { +func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) {  	// First check if `rc` is zero or not.  	checkZero := m.allocateInstr() -	checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false) +	checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false)  	m.insert(checkZero)  	// Then use CSETM to set all bits to one if `rc` is zero. @@ -2054,7 +2057,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {  	m.insert(cset)  	// Then move the bits to the result vector register. -	tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) +	tmp2 := m.compiler.AllocateVReg(ssa.TypeV128)  	dup := m.allocateInstr()  	dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D)  	m.insert(dup) @@ -2067,7 +2070,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) {  	// Finally, move the result to the destination register.  	mov2 := m.allocateInstr() -	mov2.asFpuMov128(rd.nr(), tmp2.nr()) +	mov2.asFpuMov128(rd, tmp2)  	m.insert(mov2)  } @@ -2099,28 +2102,28 @@ func (m *machine) lowerAtomicRmw(si *ssa.Instruction) {  	addr, val := si.Arg2()  	addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val)  	rn := m.getOperand_NR(addrDef, extModeNone) -	rt := operandNR(m.compiler.VRegOf(si.Return())) +	rt := m.compiler.VRegOf(si.Return())  	rs := m.getOperand_NR(valDef, extModeNone)  	_64 := si.Return().Type().Bits() == 64 -	var tmp operand +	var tmp regalloc.VReg  	if _64 { -		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) +		tmp = m.compiler.AllocateVReg(ssa.TypeI64)  	} else { -		tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) +		tmp = m.compiler.AllocateVReg(ssa.TypeI32)  	} -	m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64) +	m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64)  } -func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) { +func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) {  	switch {  	case negateArg:  		neg := m.allocateInstr() -		neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit) +		neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit)  		m.insert(neg)  	case flipArg:  		flip := m.allocateInstr() -		flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit) +		flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit)  		m.insert(flip)  	default:  		tmp = rs @@ -2139,32 +2142,32 @@ func (m *machine) lowerAtomicCas(si *ssa.Instruction) {  	rn := m.getOperand_NR(addrDef, extModeNone)  	rt := m.getOperand_NR(replDef, extModeNone)  	rs := m.getOperand_NR(expDef, extModeNone) -	tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type())) +	tmp := m.compiler.AllocateVReg(si.Return().Type())  	_64 := si.Return().Type().Bits() == 64  	// rs is overwritten by CAS, so we need to move it to the result register before the instruction  	// in case when it is used somewhere else.  	mov := m.allocateInstr()  	if _64 { -		mov.asMove64(tmp.nr(), rs.nr()) +		mov.asMove64(tmp, rs.nr())  	} else { -		mov.asMove32(tmp.nr(), rs.nr()) +		mov.asMove32(tmp, rs.nr())  	}  	m.insert(mov) -	m.lowerAtomicCasImpl(rn, tmp, rt, size) +	m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size)  	mov2 := m.allocateInstr()  	rd := m.compiler.VRegOf(si.Return())  	if _64 { -		mov2.asMove64(rd, tmp.nr()) +		mov2.asMove64(rd, tmp)  	} else { -		mov2.asMove32(rd, tmp.nr()) +		mov2.asMove32(rd, tmp)  	}  	m.insert(mov2)  } -func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) { +func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) {  	cas := m.allocateInstr()  	cas.asAtomicCas(rn, rs, rt, size)  	m.insert(cas) @@ -2176,12 +2179,12 @@ func (m *machine) lowerAtomicLoad(si *ssa.Instruction) {  	addrDef := m.compiler.ValueDefinition(addr)  	rn := m.getOperand_NR(addrDef, extModeNone) -	rt := operandNR(m.compiler.VRegOf(si.Return())) +	rt := m.compiler.VRegOf(si.Return()) -	m.lowerAtomicLoadImpl(rn, rt, size) +	m.lowerAtomicLoadImpl(rn.nr(), rt, size)  } -func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) { +func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) {  	ld := m.allocateInstr()  	ld.asAtomicLoad(rn, rt, size)  	m.insert(ld) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go index 4842eaa38..fd0760d72 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go @@ -24,6 +24,14 @@ type (  	addressModeKind byte  ) +func resetAddressMode(a *addressMode) { +	a.kind = 0 +	a.rn = 0 +	a.rm = 0 +	a.extOp = 0 +	a.imm = 0 +} +  const (  	// addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended,  	// and then scaled by bits(type)/8. @@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) {  	return  } -func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode { +func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode {  	if !offsetFitsInAddressModeKindRegSignedImm9(imm) {  		panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm))  	} +	mode := m.amodePool.Allocate()  	if preIndex { -		return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} +		*mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm}  	} else { -		return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} +		*mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm}  	} +	return mode  }  func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool { @@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret  	amode := m.lowerToAddressMode(ptr, offset, size)  	load := m.allocateInstr()  	if signed { -		load.asSLoad(operandNR(ret), amode, size) +		load.asSLoad(ret, amode, size)  	} else { -		load.asULoad(operandNR(ret), amode, size) +		load.asULoad(ret, amode, size)  	}  	m.insert(load)  } @@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa.  	load := m.allocateInstr()  	switch typ {  	case ssa.TypeI32, ssa.TypeI64: -		load.asULoad(operandNR(dst), amode, typ.Bits()) +		load.asULoad(dst, amode, typ.Bits())  	case ssa.TypeF32, ssa.TypeF64: -		load.asFpuLoad(operandNR(dst), amode, typ.Bits()) +		load.asFpuLoad(dst, amode, typ.Bits())  	case ssa.TypeV128: -		load.asFpuLoad(operandNR(dst), amode, 128) +		load.asFpuLoad(dst, amode, 128)  	default:  		panic("TODO")  	} @@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane,  	m.lowerConstantI64(offsetReg, int64(offset))  	addedBase := m.addReg64ToReg64(base, offsetReg) -	rd := operandNR(m.compiler.VRegOf(ret)) +	rd := m.compiler.VRegOf(ret)  	ld1r := m.allocateInstr()  	ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane)) @@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) {  }  // lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions. -func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) { +func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) {  	// TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and  	// addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed  	// to support more efficient address resolution. @@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte  // During the construction, this might emit additional instructions.  //  // Extracted as a separate function for easy testing. -func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) { +func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) { +	amode = m.amodePool.Allocate()  	switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); {  	case a64sExist && a32sExist:  		var base regalloc.VReg  		base = a64s.Dequeue()  		var a32 addend32  		a32 = a32s.Dequeue() -		amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} +		*amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext}  	case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset):  		var base regalloc.VReg  		base = a64s.Dequeue() -		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} +		*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset}  		offset = 0  	case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset):  		var base regalloc.VReg  		base = a64s.Dequeue() -		amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} +		*amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset}  		offset = 0  	case a64sExist:  		var base regalloc.VReg  		base = a64s.Dequeue()  		if !a64s.Empty() {  			index := a64s.Dequeue() -			amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} +			*amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */}  		} else { -			amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} +			*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}  		}  	case a32sExist:  		base32 := a32s.Dequeue() @@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32],  		if !a32s.Empty() {  			index := a32s.Dequeue() -			amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} +			*amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext}  		} else { -			amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} +			*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0}  		}  	default: // Only static offsets.  		tmpReg := m.compiler.AllocateVReg(ssa.TypeI64)  		m.lowerConstantI64(tmpReg, offset) -		amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} +		*amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0}  		offset = 0  	} @@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {  	rd = m.compiler.AllocateVReg(ssa.TypeI64)  	alu := m.allocateInstr()  	if imm12Op, ok := asImm12Operand(uint64(c)); ok { -		alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true) +		alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true)  	} else if imm12Op, ok = asImm12Operand(uint64(-c)); ok { -		alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true) +		alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true)  	} else {  		tmp := m.compiler.AllocateVReg(ssa.TypeI64)  		m.load64bitConst(c, tmp) -		alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true) +		alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true)  	}  	m.insert(alu)  	return @@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) {  func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {  	rd = m.compiler.AllocateVReg(ssa.TypeI64)  	alu := m.allocateInstr() -	alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true) +	alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true)  	m.insert(alu)  	return  } @@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) {  func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) {  	rd = m.compiler.AllocateVReg(ssa.TypeI64)  	alu := m.allocateInstr() -	alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true) +	alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true)  	m.insert(alu)  	return  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go index b435d9ba9..5f584f928 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -21,6 +21,8 @@ type (  		regAlloc   regalloc.Allocator  		regAllocFn *backend.RegAllocFunction[*instruction, *machine] +		amodePool wazevoapi.Pool[addressMode] +  		// addendsWorkQueue is used during address lowering, defined here for reuse.  		addendsWorkQueue wazevoapi.Queue[ssa.Value]  		addends32        wazevoapi.Queue[addend32] @@ -105,6 +107,7 @@ func NewBackend() backend.Machine {  		spillSlots:        make(map[regalloc.VRegID]int64),  		executableContext: newExecutableContext(),  		regAlloc:          regalloc.NewAllocator(regInfo), +		amodePool:         wazevoapi.NewPool[addressMode](resetAddressMode),  	}  	return m  } @@ -149,6 +152,7 @@ func (m *machine) Reset() {  	m.maxRequiredStackSizeForCalls = 0  	m.executableContext.Reset()  	m.jmpTableTargets = m.jmpTableTargets[:0] +	m.amodePool.Reset()  }  // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -183,9 +187,8 @@ func (m *machine) allocateBrTarget() (nop *instruction, l label) {  	l = ectx.AllocateLabel()  	nop = m.allocateInstr()  	nop.asNop0WithLabel(l) -	pos := ectx.AllocateLabelPosition(l) +	pos := ectx.GetOrAllocateLabelPosition(l)  	pos.Begin, pos.End = nop, nop -	ectx.LabelPositions[l] = pos  	return  } @@ -209,7 +212,7 @@ func (m *machine) allocateNop() *instruction {  }  func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) { -	amode := &i.amode +	amode := i.getAmode()  	switch amode.kind {  	case addressModeKindResultStackSpace:  		amode.imm += ret0offset @@ -281,7 +284,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) {  				switch cur.kind {  				case nop0:  					l := cur.nop0Label() -					if pos, ok := ectx.LabelPositions[l]; ok { +					if pos := ectx.LabelPositions[l]; pos != nil {  						pos.BinaryOffset = offset + size  					}  				case condBr: @@ -428,8 +431,10 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk *  func (m *machine) Format() string {  	ectx := m.executableContext  	begins := map[*instruction]label{} -	for l, pos := range ectx.LabelPositions { -		begins[pos.Begin] = l +	for _, pos := range ectx.LabelPositions { +		if pos != nil { +			begins[pos.Begin] = pos.L +		}  	}  	irBlocks := map[label]ssa.BasicBlockID{} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index 466fac464..d9032f921 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -70,7 +70,7 @@ func (m *machine) setupPrologue() {  		//                                          +-----------------+ <----- SP  		//                                             (low address)  		// -		_amode := addressModePreOrPostIndex(spVReg, +		_amode := addressModePreOrPostIndex(m, spVReg,  			-16,  // stack pointer must be 16-byte aligned.  			true, // Decrement before store.  		) @@ -159,7 +159,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc  		sizeOfArgRetReg = tmpRegVReg  		subSp := m.allocateInstr() -		subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true) +		subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true)  		cur = linkInstr(cur, subSp)  	} else {  		sizeOfArgRetReg = xzrVReg @@ -168,7 +168,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc  	// Saves the return address (lr) and the size_of_arg_ret below the SP.  	// size_of_arg_ret is used for stack unwinding.  	pstr := m.allocateInstr() -	amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */) +	amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */)  	pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode)  	cur = linkInstr(cur, pstr)  	return cur @@ -182,7 +182,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction {  	} else {  		frameSizeReg = xzrVReg  	} -	_amode := addressModePreOrPostIndex(spVReg, +	_amode := addressModePreOrPostIndex(m, spVReg,  		-16,  // stack pointer must be 16-byte aligned.  		true, // Decrement before store.  	) @@ -213,7 +213,7 @@ func (m *machine) postRegAlloc() {  			m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0]  		default:  			// Removes the redundant copy instruction. -			if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() { +			if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() {  				prev, next := cur.prev, cur.next  				// Remove the copy instruction.  				prev.next = next @@ -286,16 +286,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {  		for i := range m.clobberedRegs {  			vr := m.clobberedRegs[l-i] // reverse order to restore.  			load := m.allocateInstr() -			amode := addressModePreOrPostIndex(spVReg, +			amode := addressModePreOrPostIndex(m, spVReg,  				16,    // stack pointer must be 16-byte aligned.  				false, // Increment after store.  			)  			// TODO: pair loads to reduce the number of instructions.  			switch regTypeToRegisterSizeInBits(vr.RegType()) {  			case 64: // save int reg. -				load.asULoad(operandNR(vr), amode, 64) +				load.asULoad(vr, amode, 64)  			case 128: // save vector reg. -				load.asFpuLoad(operandNR(vr), amode, 128) +				load.asFpuLoad(vr, amode, 128)  			}  			cur = linkInstr(cur, load)  		} @@ -317,8 +317,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) {  	//    SP----> +-----------------+  	ldr := m.allocateInstr() -	ldr.asULoad(operandNR(lrVReg), -		addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) +	ldr.asULoad(lrVReg, +		addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64)  	cur = linkInstr(cur, ldr)  	if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 { @@ -351,14 +351,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi  	if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok {  		// sub tmp, sp, #requiredStackSize  		sub := m.allocateInstr() -		sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true) +		sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true)  		cur = linkInstr(cur, sub)  	} else {  		// This case, we first load the requiredStackSize into the temporary register,  		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)  		// Then subtract it.  		sub := m.allocateInstr() -		sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) +		sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true)  		cur = linkInstr(cur, sub)  	} @@ -366,16 +366,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi  	// ldr tmp2, [executionContext #StackBottomPtr]  	ldr := m.allocateInstr() -	ldr.asULoad(operandNR(tmp2), addressMode{ +	amode := m.amodePool.Allocate() +	*amode = addressMode{  		kind: addressModeKindRegUnsignedImm12,  		rn:   x0VReg, // execution context is always the first argument.  		imm:  wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(), -	}, 64) +	} +	ldr.asULoad(tmp2, amode, 64)  	cur = linkInstr(cur, ldr)  	// subs xzr, tmp, tmp2  	subs := m.allocateInstr() -	subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true) +	subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true)  	cur = linkInstr(cur, subs)  	// b.ge #imm @@ -388,22 +390,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi  		// First load the requiredStackSize into the temporary register,  		cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize)  		setRequiredStackSize := m.allocateInstr() -		setRequiredStackSize.asStore(operandNR(tmpRegVReg), -			addressMode{ -				kind: addressModeKindRegUnsignedImm12, -				// Execution context is always the first argument. -				rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), -			}, 64) +		amode := m.amodePool.Allocate() +		*amode = addressMode{ +			kind: addressModeKindRegUnsignedImm12, +			// Execution context is always the first argument. +			rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), +		} +		setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64)  		cur = linkInstr(cur, setRequiredStackSize)  	}  	ldrAddress := m.allocateInstr() -	ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{ +	amode2 := m.amodePool.Allocate() +	*amode2 = addressMode{  		kind: addressModeKindRegUnsignedImm12,  		rn:   x0VReg, // execution context is always the first argument  		imm:  wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(), -	}, 64) +	} +	ldrAddress.asULoad(tmpRegVReg, amode2, 64)  	cur = linkInstr(cur, ldrAddress)  	// Then jumps to the stack grow call sequence's address, meaning diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index 1c8793b73..c7eb92cc2 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -91,7 +91,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft  	}  	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) -	var amode addressMode +	var amode *addressMode  	cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)  	store := m.allocateInstr()  	store.asStore(operandNR(v), amode, typ.Bits()) @@ -116,16 +116,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af  	}  	offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) -	var amode addressMode +	var amode *addressMode  	cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true)  	load := m.allocateInstr()  	switch typ {  	case ssa.TypeI32, ssa.TypeI64: -		load.asULoad(operandNR(v), amode, typ.Bits()) +		load.asULoad(v, amode, typ.Bits())  	case ssa.TypeF32, ssa.TypeF64: -		load.asFpuLoad(operandNR(v), amode, typ.Bits()) +		load.asFpuLoad(v, amode, typ.Bits())  	case ssa.TypeV128: -		load.asFpuLoad(operandNR(v), amode, 128) +		load.asFpuLoad(v, amode, 128)  	default:  		panic("TODO")  	} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go index 3f36c84e5..655370786 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go @@ -35,7 +35,7 @@ type (  		iter                   int  		reversePostOrderBlocks []RegAllocBlock[I, m]  		// labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks. -		labelToRegAllocBlockIndex map[Label]int +		labelToRegAllocBlockIndex [] /* Label to */ int  		loopNestingForestRoots    []ssa.BasicBlock  	} @@ -56,10 +56,9 @@ type (  // NewRegAllocFunction returns a new RegAllocFunction.  func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] {  	return &RegAllocFunction[I, M]{ -		m:                         m, -		ssb:                       ssb, -		c:                         c, -		labelToRegAllocBlockIndex: make(map[Label]int), +		m:   m, +		ssb: ssb, +		c:   c,  	}  } @@ -74,6 +73,9 @@ func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end  		end:   end,  		id:    int(sb.ID()),  	}) +	if len(f.labelToRegAllocBlockIndex) <= int(l) { +		f.labelToRegAllocBlockIndex = append(f.labelToRegAllocBlockIndex, make([]int, int(l)-len(f.labelToRegAllocBlockIndex)+1)...) +	}  	f.labelToRegAllocBlockIndex[l] = i  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go index b4450d56f..eacb6a7ef 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go @@ -60,9 +60,8 @@ type (  		phiDefInstListPool       wazevoapi.Pool[phiDefInstList]  		// Followings are re-used during various places. -		blks             []Block -		reals            []RealReg -		currentOccupants regInUseSet +		blks  []Block +		reals []RealReg  		// Following two fields are updated while iterating the blocks in the reverse postorder.  		state       state @@ -755,7 +754,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  		killSet := a.reals[:0]  		// Gather the set of registers that will be used in the current instruction. -		for _, use := range instr.Uses(&a.vs) { +		uses := instr.Uses(&a.vs) +		for _, use := range uses {  			if use.IsRealReg() {  				r := use.RealReg()  				currentUsedSet = currentUsedSet.add(r) @@ -770,7 +770,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  			}  		} -		for i, use := range instr.Uses(&a.vs) { +		for i, use := range uses {  			if !use.IsRealReg() {  				vs := s.getVRegState(use.ID())  				killed := vs.lastUse == pc @@ -944,8 +944,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) {  func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) {  	s := &a.state -	for i := 0; i < 64; i++ { -		allocated := RealReg(i) +	for allocated := RealReg(0); allocated < 64; allocated++ {  		if allocated == addrReg { // If this is the call indirect, we should not touch the addr register.  			continue  		} @@ -974,11 +973,10 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {  	bID := blk.ID()  	blkSt := a.getOrAllocateBlockState(bID)  	desiredOccupants := &blkSt.startRegs -	aliveOnRegVRegs := make(map[VReg]RealReg) -	for i := 0; i < 64; i++ { -		r := RealReg(i) -		if v := blkSt.startRegs.get(r); v.Valid() { -			aliveOnRegVRegs[v] = r +	var desiredOccupantsSet RegSet +	for i, v := range desiredOccupants { +		if v != VRegInvalid { +			desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i))  		}  	} @@ -987,56 +985,38 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {  	}  	s.currentBlockID = bID -	a.updateLiveInVRState(a.getOrAllocateBlockState(bID)) +	a.updateLiveInVRState(blkSt) -	currentOccupants := &a.currentOccupants  	for i := 0; i < preds; i++ { -		currentOccupants.reset()  		if i == blkSt.startFromPredIndex {  			continue  		} -		currentOccupantsRev := make(map[VReg]RealReg)  		pred := blk.Pred(i)  		predSt := a.getOrAllocateBlockState(pred.ID()) -		for ii := 0; ii < 64; ii++ { -			r := RealReg(ii) -			if v := predSt.endRegs.get(r); v.Valid() { -				if _, ok := aliveOnRegVRegs[v]; !ok { -					continue -				} -				currentOccupants.add(r, v) -				currentOccupantsRev[v] = r -			} -		}  		s.resetAt(predSt)  		// Finds the free registers if any.  		intTmp, floatTmp := VRegInvalid, VRegInvalid  		if intFree := s.findAllocatable( -			a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set, +			a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet,  		); intFree != RealRegInvalid {  			intTmp = FromRealReg(intFree, RegTypeInt)  		}  		if floatFree := s.findAllocatable( -			a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set, +			a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet,  		); floatFree != RealRegInvalid {  			floatTmp = FromRealReg(floatFree, RegTypeFloat)  		} -		if wazevoapi.RegAllocLoggingEnabled { -			fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) -		} - -		for ii := 0; ii < 64; ii++ { -			r := RealReg(ii) +		for r := RealReg(0); r < 64; r++ {  			desiredVReg := desiredOccupants.get(r)  			if !desiredVReg.Valid() {  				continue  			} -			currentVReg := currentOccupants.get(r) +			currentVReg := s.regsInUse.get(r)  			if desiredVReg.ID() == currentVReg.ID() {  				continue  			} @@ -1048,86 +1028,95 @@ func (a *Allocator) fixMergeState(f Function, blk Block) {  			} else {  				tmpRealReg = floatTmp  			} -			a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ) +			a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ)  		}  	}  } +// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`. +// +//   - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor. +//   - desiredVReg is the desired VReg value that should be on the register `r`. +//   - freeReg is the temporary register that can be used to swap the values, which may or may not be used. +//   - typ is the register type of the `r`.  func (a *Allocator) reconcileEdge(f Function,  	r RealReg,  	pred Block, -	currentOccupants *regInUseSet, -	currentOccupantsRev map[VReg]RealReg,  	currentVReg, desiredVReg VReg,  	freeReg VReg,  	typ RegType,  ) { +	// There are four cases to consider: +	// 1. currentVReg is valid, but desiredVReg is on the stack. +	// 2. Both currentVReg and desiredVReg are valid. +	// 3. Desired is on a different register than `r` and currentReg is not valid. +	// 4. Desired is on the stack and currentReg is not valid. +  	s := &a.state  	if currentVReg.Valid() { -		// Both are on reg. -		er, ok := currentOccupantsRev[desiredVReg] -		if !ok { +		desiredState := s.getVRegState(desiredVReg.ID()) +		er := desiredState.r +		if er == RealRegInvalid { +			// Case 1: currentVReg is valid, but desiredVReg is on the stack.  			if wazevoapi.RegAllocLoggingEnabled {  				fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n",  					desiredVReg.ID(), a.regInfo.RealRegName(r),  				)  			} -			// This case is that the desired value is on the stack, but currentVReg is on the target register. -			// We need to move the current value to the stack, and reload the desired value. +			// We need to move the current value to the stack, and reload the desired value into the register.  			// TODO: we can do better here.  			f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion()) -			delete(currentOccupantsRev, currentVReg) +			s.releaseRealReg(r)  			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) -			currentOccupants.add(r, desiredVReg) -			currentOccupantsRev[desiredVReg] = r +			s.useRealReg(r, desiredVReg)  			return -		} - -		if wazevoapi.RegAllocLoggingEnabled { -			fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", -				desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), +		} else { +			// Case 2: Both currentVReg and desiredVReg are valid. +			if wazevoapi.RegAllocLoggingEnabled { +				fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", +					desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), +				) +			} +			// This case, we need to swap the values between the current and desired values. +			f.SwapBefore( +				currentVReg.SetRealReg(r), +				desiredVReg.SetRealReg(er), +				freeReg, +				pred.LastInstrForInsertion(),  			) -		} -		f.SwapBefore( -			currentVReg.SetRealReg(r), -			desiredVReg.SetRealReg(er), -			freeReg, -			pred.LastInstrForInsertion(), -		) -		s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) -		currentOccupantsRev[desiredVReg] = r -		currentOccupantsRev[currentVReg] = er -		currentOccupants.add(r, desiredVReg) -		currentOccupants.add(er, currentVReg) -		if wazevoapi.RegAllocLoggingEnabled { -			fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) +			s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) +			s.releaseRealReg(r) +			s.releaseRealReg(er) +			s.useRealReg(r, desiredVReg) +			s.useRealReg(er, currentVReg) +			if wazevoapi.RegAllocLoggingEnabled { +				fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) +			}  		}  	} else { -		// Desired is on reg, but currently the target register is not used.  		if wazevoapi.RegAllocLoggingEnabled {  			fmt.Printf("\t\tv%d is desired to be on %s, current not used\n",  				desiredVReg.ID(), a.regInfo.RealRegName(r),  			)  		} -		if currentReg, ok := currentOccupantsRev[desiredVReg]; ok { +		if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid { +			// Case 3: Desired is on a different register than `r` and currentReg is not valid. +			// We simply need to move the desired value to the register.  			f.InsertMoveBefore(  				FromRealReg(r, typ),  				desiredVReg.SetRealReg(currentReg),  				pred.LastInstrForInsertion(),  			) -			currentOccupants.remove(currentReg) +			s.releaseRealReg(currentReg)  		} else { +			// Case 4: Both currentVReg and desiredVReg are not valid. +			// We simply need to reload the desired value into the register.  			s.getVRegState(desiredVReg.ID()).recordReload(f, pred)  			f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion())  		} -		currentOccupantsRev[desiredVReg] = r -		currentOccupants.add(r, desiredVReg) -	} - -	if wazevoapi.RegAllocLoggingEnabled { -		fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) +		s.useRealReg(r, desiredVReg)  	}  } @@ -1169,8 +1158,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) {  	}  	for pos != definingBlk {  		st := a.getOrAllocateBlockState(pos.ID()) -		for ii := 0; ii < 64; ii++ { -			rr := RealReg(ii) +		for rr := RealReg(0); rr < 64; rr++ {  			if st.startRegs.get(rr) == v {  				r = rr  				// Already in the register, so we can place the spill at the beginning of the block. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go index e9bf60661..04a8e8f4d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go @@ -46,23 +46,24 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) {  	}  } -type regInUseSet struct { -	set RegSet -	vrs [64]VReg +type regInUseSet [64]VReg + +func newRegInUseSet() regInUseSet { +	var ret regInUseSet +	ret.reset() +	return ret  }  func (rs *regInUseSet) reset() { -	rs.set = 0 -	for i := range rs.vrs { -		rs.vrs[i] = VRegInvalid +	for i := range rs { +		rs[i] = VRegInvalid  	}  }  func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused  	var ret []string -	for i := 0; i < 64; i++ { -		if rs.set&(1<<uint(i)) != 0 { -			vr := rs.vrs[i] +	for i, vr := range rs { +		if vr != VRegInvalid {  			ret = append(ret, fmt.Sprintf("(%s->v%d)", info.RealRegName(RealReg(i)), vr.ID()))  		}  	} @@ -70,39 +71,28 @@ func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused  }  func (rs *regInUseSet) has(r RealReg) bool { -	if r >= 64 { -		return false -	} -	return rs.set&(1<<uint(r)) != 0 +	return r < 64 && rs[r] != VRegInvalid  }  func (rs *regInUseSet) get(r RealReg) VReg { -	if r >= 64 { -		return VRegInvalid -	} -	return rs.vrs[r] +	return rs[r]  }  func (rs *regInUseSet) remove(r RealReg) { -	if r >= 64 { -		return -	} -	rs.set &= ^(1 << uint(r)) -	rs.vrs[r] = VRegInvalid +	rs[r] = VRegInvalid  }  func (rs *regInUseSet) add(r RealReg, vr VReg) {  	if r >= 64 {  		return  	} -	rs.set |= 1 << uint(r) -	rs.vrs[r] = vr +	rs[r] = vr  }  func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) { -	for i := 0; i < 64; i++ { -		if rs.set&(1<<uint(i)) != 0 { -			f(RealReg(i), rs.vrs[i]) +	for i, vr := range rs { +		if vr != VRegInvalid { +			f(RealReg(i), vr)  		}  	}  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go index 3379c4dde..72ce44e26 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/call_engine.go @@ -2,7 +2,6 @@ package wazevo  import (  	"context" -	"encoding/binary"  	"fmt"  	"reflect"  	"runtime" @@ -310,15 +309,6 @@ func (c *callEngine) callWithStack(ctx context.Context, paramResultStack []uint6  				*argRes = uint64(0xffffffff) // = -1 in signed 32-bit integer.  			} else {  				*argRes = uint64(res) -				calleeOpaque := opaqueViewFromPtr(uintptr(unsafe.Pointer(c.execCtx.callerModuleContextPtr))) -				if mod.Source.MemorySection != nil { // Local memory. -					putLocalMemory(calleeOpaque, 8 /* local memory begins at 8 */, mem) -				} else { -					// Imported memory's owner at offset 16 of the callerModuleContextPtr. -					opaquePtr := uintptr(binary.LittleEndian.Uint64(calleeOpaque[16:])) -					importedMemOwner := opaqueViewFromPtr(opaquePtr) -					putLocalMemory(importedMemOwner, 8 /* local memory begins at 8 */, mem) -				}  			}  			c.execCtx.exitCode = wazevoapi.ExitCodeOK  			afterGoFunctionCallEntrypoint(c.execCtx.goCallReturnAddress, c.execCtxPtr, uintptr(unsafe.Pointer(c.execCtx.stackPointerBeforeGoCall)), c.execCtx.framePointerBeforeGoCall) @@ -525,14 +515,6 @@ func (c *callEngine) callerModuleInstance() *wasm.ModuleInstance {  	return moduleInstanceFromOpaquePtr(c.execCtx.callerModuleContextPtr)  } -func opaqueViewFromPtr(ptr uintptr) []byte { -	var opaque []byte -	sh := (*reflect.SliceHeader)(unsafe.Pointer(&opaque)) -	sh.Data = ptr -	setSliceLimits(sh, 24, 24) -	return opaque -} -  const callStackCeiling = uintptr(50000000) // in uint64 (8 bytes) == 400000000 bytes in total == 400mb.  func (c *callEngine) growStackWithGuarded() (newSP uintptr, newFP uintptr, err error) { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go index f7c0450ae..e49353dc8 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/engine_cache.go @@ -31,6 +31,13 @@ func fileCacheKey(m *wasm.Module) (ret filecache.Key) {  	s := sha256.New()  	s.Write(m.ID[:])  	s.Write(magic) +	// Write the CPU features so that we can cache the compiled module for the same CPU. +	// This prevents the incompatible CPU features from being used. +	cpu := platform.CpuFeatures.Raw() +	// Reuse the `ret` buffer to write the first 8 bytes of the CPU features so that we can avoid the allocation. +	binary.LittleEndian.PutUint64(ret[:8], cpu) +	s.Write(ret[:8]) +	// Finally, write the hash to the ret buffer.  	s.Sum(ret[:0])  	return  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go index 873a35a55..42cc21dcd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/frontend.go @@ -301,26 +301,7 @@ func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) {  		st := WasmTypeToSSAType(typ)  		variable := c.ssaBuilder.DeclareVariable(st)  		c.setWasmLocalVariable(wasm.Index(i)+localCount, variable) - -		zeroInst := c.ssaBuilder.AllocateInstruction() -		switch st { -		case ssa.TypeI32: -			zeroInst.AsIconst32(0) -		case ssa.TypeI64: -			zeroInst.AsIconst64(0) -		case ssa.TypeF32: -			zeroInst.AsF32const(0) -		case ssa.TypeF64: -			zeroInst.AsF64const(0) -		case ssa.TypeV128: -			zeroInst.AsVconst(0, 0) -		default: -			panic("TODO: " + wasm.ValueTypeName(typ)) -		} - -		c.ssaBuilder.InsertInstruction(zeroInst) -		value := zeroInst.Return() -		c.ssaBuilder.DefineVariable(variable, value, entry) +		c.ssaBuilder.InsertZeroValue(st)  	}  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go index 5096a6365..ff963e605 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/frontend/lower.go @@ -1086,16 +1086,8 @@ func (c *Compiler) lowerCurrentOpcode() {  			break  		}  		variable := c.localVariable(index) -		if _, ok := c.m.NonStaticLocals[c.wasmLocalFunctionIndex][index]; ok { -			state.push(builder.MustFindValue(variable)) -		} else { -			// If a local is static, we can simply find it in the entry block which is either a function param -			// or a zero value. This fast pass helps to avoid the overhead of searching the entire function plus -			// avoid adding unnecessary block arguments. -			// TODO: I think this optimization should be done in a SSA pass like passRedundantPhiEliminationOpt, -			// 	but somehow there's some corner cases that it fails to optimize. -			state.push(builder.MustFindValueInBlk(variable, c.ssaBuilder.EntryBlock())) -		} +		state.push(builder.MustFindValue(variable)) +  	case wasm.OpcodeLocalSet:  		index := c.readI32u()  		if state.unreachable { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go index ba8f546c0..efa1b9bba 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/module_engine.go @@ -86,16 +86,6 @@ func newAlignedOpaque(size int) moduleContextOpaque {  	return buf  } -func putLocalMemory(opaque []byte, offset wazevoapi.Offset, mem *wasm.MemoryInstance) { -	s := uint64(len(mem.Buffer)) -	var b uint64 -	if len(mem.Buffer) > 0 { -		b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) -	} -	binary.LittleEndian.PutUint64(opaque[offset:], b) -	binary.LittleEndian.PutUint64(opaque[offset+8:], s) -} -  func (m *moduleEngine) setupOpaque() {  	inst := m.module  	offsets := &m.parent.offsets @@ -106,7 +96,7 @@ func (m *moduleEngine) setupOpaque() {  	)  	if lm := offsets.LocalMemoryBegin; lm >= 0 { -		putLocalMemory(opaque, lm, inst.MemoryInstance) +		m.putLocalMemory()  	}  	// Note: imported memory is resolved in ResolveImportedFunction. @@ -227,6 +217,25 @@ func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) {  // OwnsGlobals implements the same method as documented on wasm.ModuleEngine.  func (m *moduleEngine) OwnsGlobals() bool { return true } +// MemoryGrown implements wasm.ModuleEngine. +func (m *moduleEngine) MemoryGrown() { +	m.putLocalMemory() +} + +// putLocalMemory writes the local memory buffer pointer and length to the opaque buffer. +func (m *moduleEngine) putLocalMemory() { +	mem := m.module.MemoryInstance +	offset := m.parent.offsets.LocalMemoryBegin + +	s := uint64(len(mem.Buffer)) +	var b uint64 +	if len(mem.Buffer) > 0 { +		b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) +	} +	binary.LittleEndian.PutUint64(m.opaque[offset:], b) +	binary.LittleEndian.PutUint64(m.opaque[offset+8:], s) +} +  // ResolveImportedFunction implements wasm.ModuleEngine.  func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) {  	executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go index 10b6b4b62..39627b989 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go @@ -49,21 +49,12 @@ type BasicBlock interface {  	// ReturnBlock returns ture if this block represents the function return.  	ReturnBlock() bool -	// FormatHeader returns the debug string of this block, not including instruction. -	FormatHeader(b Builder) string -  	// Valid is true if this block is still valid even after optimizations.  	Valid() bool  	// Sealed is true if this block has been sealed.  	Sealed() bool -	// BeginPredIterator returns the first predecessor of this block. -	BeginPredIterator() BasicBlock - -	// NextPredIterator returns the next predecessor of this block. -	NextPredIterator() BasicBlock -  	// Preds returns the number of predecessors of this block.  	Preds() int @@ -88,10 +79,11 @@ type (  	basicBlock struct {  		id                      BasicBlockID  		rootInstr, currentInstr *Instruction -		params                  []blockParam -		predIter                int -		preds                   []basicBlockPredecessorInfo -		success                 []*basicBlock +		// params are Values that represent parameters to a basicBlock. +		// Each parameter can be considered as an output of PHI instruction in traditional SSA. +		params  []Value +		preds   []basicBlockPredecessorInfo +		success []*basicBlock  		// singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called.  		singlePred *basicBlock  		// lastDefinitions maps Variable to its last definition in this block. @@ -116,11 +108,14 @@ type (  		// loopNestingForestChildren holds the children of this block in the loop nesting forest.  		// Non-empty if and only if this block is a loop header (i.e. loopHeader=true) -		loopNestingForestChildren []BasicBlock +		loopNestingForestChildren wazevoapi.VarLength[BasicBlock]  		// reversePostOrder is used to sort all the blocks in the function in reverse post order.  		// This is used in builder.LayoutBlocks. -		reversePostOrder int +		reversePostOrder int32 + +		// visited is used during various traversals. +		visited int32  		// child and sibling are the ones in the dominator tree.  		child, sibling *basicBlock @@ -128,15 +123,6 @@ type (  	// BasicBlockID is the unique ID of a basicBlock.  	BasicBlockID uint32 -	// blockParam implements Value and represents a parameter to a basicBlock. -	blockParam struct { -		// value is the Value that corresponds to the parameter in this block, -		// and can be considered as an output of PHI instruction in traditional SSA. -		value Value -		// typ is the type of the parameter. -		typ Type -	} -  	unknownValue struct {  		// variable is the variable that this unknownValue represents.  		variable Variable @@ -145,6 +131,9 @@ type (  	}  ) +// basicBlockVarLengthNil is the default nil value for basicBlock.loopNestingForestChildren. +var basicBlockVarLengthNil = wazevoapi.NewNilVarLength[BasicBlock]() +  const basicBlockIDReturnBlock = 0xffffffff  // Name implements BasicBlock.Name. @@ -190,13 +179,13 @@ func (bb *basicBlock) ReturnBlock() bool {  // AddParam implements BasicBlock.AddParam.  func (bb *basicBlock) AddParam(b Builder, typ Type) Value {  	paramValue := b.allocateValue(typ) -	bb.params = append(bb.params, blockParam{typ: typ, value: paramValue}) +	bb.params = append(bb.params, paramValue)  	return paramValue  }  // addParamOn adds a parameter to this block whose value is already allocated. -func (bb *basicBlock) addParamOn(typ Type, value Value) { -	bb.params = append(bb.params, blockParam{typ: typ, value: value}) +func (bb *basicBlock) addParamOn(value Value) { +	bb.params = append(bb.params, value)  }  // Params implements BasicBlock.Params. @@ -206,8 +195,7 @@ func (bb *basicBlock) Params() int {  // Param implements BasicBlock.Param.  func (bb *basicBlock) Param(i int) Value { -	p := &bb.params[i] -	return p.value +	return bb.params[i]  }  // Valid implements BasicBlock.Valid. @@ -248,22 +236,6 @@ func (bb *basicBlock) NumPreds() int {  	return len(bb.preds)  } -// BeginPredIterator implements BasicBlock.BeginPredIterator. -func (bb *basicBlock) BeginPredIterator() BasicBlock { -	bb.predIter = 0 -	return bb.NextPredIterator() -} - -// NextPredIterator implements BasicBlock.NextPredIterator. -func (bb *basicBlock) NextPredIterator() BasicBlock { -	if bb.predIter >= len(bb.preds) { -		return nil -	} -	pred := bb.preds[bb.predIter].blk -	bb.predIter++ -	return pred -} -  // Preds implements BasicBlock.Preds.  func (bb *basicBlock) Preds() int {  	return len(bb.preds) @@ -305,7 +277,8 @@ func resetBasicBlock(bb *basicBlock) {  	bb.unknownValues = bb.unknownValues[:0]  	bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions)  	bb.reversePostOrder = -1 -	bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0] +	bb.visited = 0 +	bb.loopNestingForestChildren = basicBlockVarLengthNil  	bb.loopHeader = false  	bb.sibling = nil  	bb.child = nil @@ -335,11 +308,11 @@ func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) {  	pred.success = append(pred.success, bb)  } -// FormatHeader implements BasicBlock.FormatHeader. -func (bb *basicBlock) FormatHeader(b Builder) string { +// formatHeader returns the string representation of the header of the basicBlock. +func (bb *basicBlock) formatHeader(b Builder) string {  	ps := make([]string, len(bb.params))  	for i, p := range bb.params { -		ps[i] = p.value.formatWithType(b) +		ps[i] = p.formatWithType(b)  	}  	if len(bb.preds) > 0 { @@ -398,7 +371,7 @@ func (bb *basicBlock) String() string {  // LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren.  func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock { -	return bb.loopNestingForestChildren +	return bb.loopNestingForestChildren.View()  }  // LoopHeader implements BasicBlock.LoopHeader. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go index 1fc84d2ea..0b700c4b1 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go @@ -54,9 +54,6 @@ type Builder interface {  	// MustFindValue searches the latest definition of the given Variable and returns the result.  	MustFindValue(variable Variable) Value -	// MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock. -	MustFindValueInBlk(variable Variable, blk BasicBlock) Value -  	// FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock.  	// If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid.  	FindValueInLinearPath(variable Variable) Value @@ -127,7 +124,11 @@ type Builder interface {  	// Idom returns the immediate dominator of the given BasicBlock.  	Idom(blk BasicBlock) BasicBlock +	// VarLengthPool returns the VarLengthPool of Value.  	VarLengthPool() *wazevoapi.VarLengthPool[Value] + +	// InsertZeroValue inserts a zero value constant instruction of the given type. +	InsertZeroValue(t Type)  }  // NewBuilder returns a new Builder implementation. @@ -135,10 +136,10 @@ func NewBuilder() Builder {  	return &builder{  		instructionsPool:               wazevoapi.NewPool[Instruction](resetInstruction),  		basicBlocksPool:                wazevoapi.NewPool[basicBlock](resetBasicBlock), +		varLengthBasicBlockPool:        wazevoapi.NewVarLengthPool[BasicBlock](),  		varLengthPool:                  wazevoapi.NewVarLengthPool[Value](),  		valueAnnotations:               make(map[ValueID]string),  		signatures:                     make(map[SignatureID]*Signature), -		blkVisited:                     make(map[*basicBlock]int),  		valueIDAliases:                 make(map[ValueID]Value),  		redundantParameterIndexToValue: make(map[int]Value),  		returnBlk:                      &basicBlock{id: basicBlockIDReturnBlock}, @@ -177,12 +178,13 @@ type builder struct {  	dominators []*basicBlock  	sparseTree dominatorSparseTree +	varLengthBasicBlockPool wazevoapi.VarLengthPool[BasicBlock] +  	// loopNestingForestRoots are the roots of the loop nesting forest.  	loopNestingForestRoots []BasicBlock  	// The followings are used for optimization passes/deterministic compilation.  	instStack                      []*Instruction -	blkVisited                     map[*basicBlock]int  	valueIDToInstruction           []*Instruction  	blkStack                       []*basicBlock  	blkStack2                      []*basicBlock @@ -200,6 +202,32 @@ type builder struct {  	donePostBlockLayoutPasses bool  	currentSourceOffset SourceOffset + +	// zeros are the zero value constants for each type. +	zeros [typeEnd]Value +} + +// InsertZeroValue implements Builder.InsertZeroValue. +func (b *builder) InsertZeroValue(t Type) { +	if b.zeros[t].Valid() { +		return +	} +	zeroInst := b.AllocateInstruction() +	switch t { +	case TypeI32: +		zeroInst.AsIconst32(0) +	case TypeI64: +		zeroInst.AsIconst64(0) +	case TypeF32: +		zeroInst.AsF32const(0) +	case TypeF64: +		zeroInst.AsF64const(0) +	case TypeV128: +		zeroInst.AsVconst(0, 0) +	default: +		panic("TODO: " + t.String()) +	} +	b.zeros[t] = zeroInst.Insert(b).Return()  }  func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] { @@ -215,10 +243,12 @@ func (b *builder) ReturnBlock() BasicBlock {  func (b *builder) Init(s *Signature) {  	b.nextVariable = 0  	b.currentSignature = s +	b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid}  	resetBasicBlock(b.returnBlk)  	b.instructionsPool.Reset()  	b.basicBlocksPool.Reset()  	b.varLengthPool.Reset() +	b.varLengthBasicBlockPool.Reset()  	b.donePreBlockLayoutPasses = false  	b.doneBlockLayout = false  	b.donePostBlockLayoutPasses = false @@ -231,11 +261,6 @@ func (b *builder) Init(s *Signature) {  	b.blkStack2 = b.blkStack2[:0]  	b.dominators = b.dominators[:0]  	b.loopNestingForestRoots = b.loopNestingForestRoots[:0] - -	for i := 0; i < b.basicBlocksPool.Allocated(); i++ { -		blk := b.basicBlocksPool.View(i) -		delete(b.blkVisited, blk) -	}  	b.basicBlocksPool.Reset()  	for v := ValueID(0); v < b.nextValueID; v++ { @@ -448,11 +473,6 @@ func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Valu  	return ValueInvalid  } -func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value { -	typ := b.definedVariableType(variable) -	return b.findValue(typ, variable, blk.(*basicBlock)) -} -  // MustFindValue implements Builder.MustFindValue.  func (b *builder) MustFindValue(variable Variable) Value {  	typ := b.definedVariableType(variable) @@ -482,6 +502,9 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value  			value:    value,  		})  		return value +	} else if blk.EntryBlock() { +		// If this is the entry block, we reach the uninitialized variable which has zero value. +		return b.zeros[b.definedVariableType(variable)]  	}  	if pred := blk.singlePred; pred != nil { @@ -495,21 +518,42 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value  	// If this block has multiple predecessors, we have to gather the definitions,  	// and treat them as an argument to this block.  	// -	// The first thing is to define a new parameter to this block which may or may not be redundant, but -	// later we eliminate trivial params in an optimization pass. This must be done before finding the -	// definitions in the predecessors so that we can break the cycle. -	paramValue := blk.AddParam(b, typ) -	b.DefineVariable(variable, paramValue, blk) - -	// After the new param is added, we have to manipulate the original branching instructions -	// in predecessors so that they would pass the definition of `variable` as the argument to -	// the newly added PHI. +	// But before that, we have to check if the possible definitions are the same Value. +	tmpValue := b.allocateValue(typ) +	// Break the cycle by defining the variable with the tmpValue. +	b.DefineVariable(variable, tmpValue, blk) +	// Check all the predecessors if they have the same definition. +	uniqueValue := ValueInvalid  	for i := range blk.preds { -		pred := &blk.preds[i] -		value := b.findValue(typ, variable, pred.blk) -		pred.branch.addArgumentBranchInst(b, value) +		predValue := b.findValue(typ, variable, blk.preds[i].blk) +		if uniqueValue == ValueInvalid { +			uniqueValue = predValue +		} else if uniqueValue != predValue { +			uniqueValue = ValueInvalid +			break +		} +	} + +	if uniqueValue != ValueInvalid { +		// If all the predecessors have the same definition, we can use that value. +		b.DefineVariable(variable, uniqueValue, blk) +		b.alias(tmpValue, uniqueValue) +		return uniqueValue +	} else { +		// Otherwise, add the tmpValue to this block as a parameter which may or may not be redundant, but +		// later we eliminate trivial params in an optimization pass. This must be done before finding the +		// definitions in the predecessors so that we can break the cycle. +		blk.addParamOn(tmpValue) +		// After the new param is added, we have to manipulate the original branching instructions +		// in predecessors so that they would pass the definition of `variable` as the argument to +		// the newly added PHI. +		for i := range blk.preds { +			pred := &blk.preds[i] +			value := b.findValue(typ, variable, pred.blk) +			pred.branch.addArgumentBranchInst(b, value) +		} +		return tmpValue  	} -	return paramValue  }  // Seal implements Builder.Seal. @@ -523,7 +567,7 @@ func (b *builder) Seal(raw BasicBlock) {  	for _, v := range blk.unknownValues {  		variable, phiValue := v.variable, v.value  		typ := b.definedVariableType(variable) -		blk.addParamOn(typ, phiValue) +		blk.addParamOn(phiValue)  		for i := range blk.preds {  			pred := &blk.preds[i]  			predValue := b.findValue(typ, variable, pred.blk) @@ -566,7 +610,7 @@ func (b *builder) Format() string {  	}  	for bb := iterBegin(); bb != nil; bb = iterNext() {  		str.WriteByte('\n') -		str.WriteString(bb.FormatHeader(b)) +		str.WriteString(bb.formatHeader(b))  		str.WriteByte('\n')  		for cur := bb.Root(); cur != nil; cur = cur.Next() { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go index a2e986cd1..89ec34b7e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go @@ -22,9 +22,9 @@ func (b *builder) RunPasses() {  func (b *builder) runPreBlockLayoutPasses() {  	passSortSuccessors(b)  	passDeadBlockEliminationOpt(b) -	passRedundantPhiEliminationOpt(b)  	// The result of passCalculateImmediateDominators will be used by various passes below.  	passCalculateImmediateDominators(b) +	passRedundantPhiEliminationOpt(b)  	passNopInstElimination(b)  	// TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. @@ -78,12 +78,11 @@ func (b *builder) runFinalizingPasses() {  // passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so.  func passDeadBlockEliminationOpt(b *builder) {  	entryBlk := b.entryBlk() -	b.clearBlkVisited()  	b.blkStack = append(b.blkStack, entryBlk)  	for len(b.blkStack) > 0 {  		reachableBlk := b.blkStack[len(b.blkStack)-1]  		b.blkStack = b.blkStack[:len(b.blkStack)-1] -		b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass. +		reachableBlk.visited = 1  		if !reachableBlk.sealed && !reachableBlk.ReturnBlock() {  			panic(fmt.Sprintf("%s is not sealed", reachableBlk)) @@ -94,7 +93,7 @@ func passDeadBlockEliminationOpt(b *builder) {  		}  		for _, succ := range reachableBlk.success { -			if _, ok := b.blkVisited[succ]; ok { +			if succ.visited == 1 {  				continue  			}  			b.blkStack = append(b.blkStack, succ) @@ -102,13 +101,16 @@ func passDeadBlockEliminationOpt(b *builder) {  	}  	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { -		if _, ok := b.blkVisited[blk]; !ok { +		if blk.visited != 1 {  			blk.invalid = true  		} +		blk.visited = 0  	}  }  // passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block). +// This requires the reverse post-order traversal to be calculated before calling this function, +// hence passCalculateImmediateDominators must be called before this.  func passRedundantPhiEliminationOpt(b *builder) {  	redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations. @@ -118,15 +120,18 @@ func passRedundantPhiEliminationOpt(b *builder) {  	//  relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs,  	//  the maximum number of iteration was 22, which seems to be acceptable but not that small either since the  	//  complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands. +	//  -- Note -- +	// 	Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when +	// 	running on the reverse post-order. It might be possible to optimize this further by using the dominator tree.  	for {  		changed := false -		_ = b.blockIteratorBegin() // skip entry block! +		_ = b.blockIteratorReversePostOrderBegin() // skip entry block!  		// Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops! -		for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() { +		for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() {  			paramNum := len(blk.params)  			for paramIndex := 0; paramIndex < paramNum; paramIndex++ { -				phiValue := blk.params[paramIndex].value +				phiValue := blk.params[paramIndex]  				redundant := true  				nonSelfReferencingValue := ValueInvalid @@ -184,7 +189,7 @@ func passRedundantPhiEliminationOpt(b *builder) {  			// Still need to have the definition of the value of the PHI (previously as the parameter).  			for _, redundantParamIndex := range redundantParameterIndexes { -				phiValue := blk.params[redundantParamIndex].value +				phiValue := blk.params[redundantParamIndex]  				onlyValue := b.redundantParameterIndexToValue[redundantParamIndex]  				// Create an alias in this block from the only phi argument to the phi value.  				b.alias(phiValue, onlyValue) @@ -227,10 +232,10 @@ func passRedundantPhiEliminationOpt(b *builder) {  func passDeadCodeEliminationOpt(b *builder) {  	nvid := int(b.nextValueID)  	if nvid >= len(b.valueRefCounts) { -		b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...) +		b.valueRefCounts = append(b.valueRefCounts, make([]int, nvid-len(b.valueRefCounts)+1)...)  	}  	if nvid >= len(b.valueIDToInstruction) { -		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) +		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, nvid-len(b.valueIDToInstruction)+1)...)  	}  	// First, we gather all the instructions with side effects. @@ -350,22 +355,10 @@ func (b *builder) incRefCount(id ValueID, from *Instruction) {  	b.valueRefCounts[id]++  } -// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places. -func (b *builder) clearBlkVisited() { -	b.blkStack2 = b.blkStack2[:0] -	for key := range b.blkVisited { -		b.blkStack2 = append(b.blkStack2, key) -	} -	for _, blk := range b.blkStack2 { -		delete(b.blkVisited, blk) -	} -	b.blkStack2 = b.blkStack2[:0] -} -  // passNopInstElimination eliminates the instructions which is essentially a no-op.  func passNopInstElimination(b *builder) {  	if int(b.nextValueID) >= len(b.valueIDToInstruction) { -		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) +		b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, int(b.nextValueID)-len(b.valueIDToInstruction)+1)...)  	}  	for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go index 9068180a0..584b5eade 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go @@ -23,8 +23,6 @@ import (  //  // This heuristic is done in maybeInvertBranches function.  func passLayoutBlocks(b *builder) { -	b.clearBlkVisited() -  	// We might end up splitting critical edges which adds more basic blocks,  	// so we store the currently existing basic blocks in nonSplitBlocks temporarily.  	// That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks. @@ -47,20 +45,20 @@ func passLayoutBlocks(b *builder) {  	for _, blk := range nonSplitBlocks {  		for i := range blk.preds {  			pred := blk.preds[i].blk -			if _, ok := b.blkVisited[pred]; ok || !pred.Valid() { +			if pred.visited == 1 || !pred.Valid() {  				continue  			} else if pred.reversePostOrder < blk.reversePostOrder {  				// This means the edge is critical, and this pred is the trampoline and yet to be inserted.  				// Split edge trampolines must come before the destination in reverse post-order.  				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred) -				b.blkVisited[pred] = 0 // mark as inserted, the value is not used. +				pred.visited = 1 // mark as inserted.  			}  		}  		// Now that we've already added all the potential trampoline blocks incoming to this block,  		// we can add this block itself.  		b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk) -		b.blkVisited[blk] = 0 // mark as inserted, the value is not used. +		blk.visited = 1 // mark as inserted.  		if len(blk.success) < 2 {  			// There won't be critical edge originating from this block. @@ -116,7 +114,7 @@ func passLayoutBlocks(b *builder) {  			if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline {  				// This can be lowered as fallthrough at the end of the block.  				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) -				b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. +				trampoline.visited = 1 // mark as inserted.  			} else {  				uninsertedTrampolines = append(uninsertedTrampolines, trampoline)  			} @@ -126,7 +124,7 @@ func passLayoutBlocks(b *builder) {  			if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself.  				// This means the critical edge was backward, so we insert after the current block immediately.  				b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) -				b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. +				trampoline.visited = 1 // mark as inserted.  			} // If the target is forward, we can wait to insert until the target is inserted.  		}  		uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block. @@ -142,8 +140,8 @@ func passLayoutBlocks(b *builder) {  	if wazevoapi.SSAValidationEnabled {  		for _, trampoline := range trampolines { -			if _, ok := b.blkVisited[trampoline]; !ok { -				panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b)) +			if trampoline.visited != 1 { +				panic("BUG: trampoline block not inserted: " + trampoline.formatHeader(b))  			}  			trampoline.validate(b)  		} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go index 50cb9c475..e8288c4bd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go @@ -15,10 +15,6 @@ import (  // At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag.  func passCalculateImmediateDominators(b *builder) {  	reversePostOrder := b.reversePostOrderedBasicBlocks[:0] -	exploreStack := b.blkStack[:0] -	b.clearBlkVisited() - -	entryBlk := b.entryBlk()  	// Store the reverse postorder from the entrypoint into reversePostOrder slice.  	// This calculation of reverse postorder is not described in the paper, @@ -28,14 +24,17 @@ func passCalculateImmediateDominators(b *builder) {  	// which is a reasonable assumption as long as SSA Builder is properly used.  	//  	// First we push blocks in postorder iteratively visit successors of the entry block. -	exploreStack = append(exploreStack, entryBlk) +	entryBlk := b.entryBlk() +	exploreStack := append(b.blkStack[:0], entryBlk) +	// These flags are used to track the state of the block in the DFS traversal. +	// We temporarily use the reversePostOrder field to store the state.  	const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2 -	b.blkVisited[entryBlk] = visitStateSeen +	entryBlk.visited = visitStateSeen  	for len(exploreStack) > 0 {  		tail := len(exploreStack) - 1  		blk := exploreStack[tail]  		exploreStack = exploreStack[:tail] -		switch b.blkVisited[blk] { +		switch blk.visited {  		case visitStateUnseen:  			// This is likely a bug in the frontend.  			panic("BUG: unsupported CFG") @@ -48,16 +47,18 @@ func passCalculateImmediateDominators(b *builder) {  				if succ.ReturnBlock() || succ.invalid {  					continue  				} -				if b.blkVisited[succ] == visitStateUnseen { -					b.blkVisited[succ] = visitStateSeen +				if succ.visited == visitStateUnseen { +					succ.visited = visitStateSeen  					exploreStack = append(exploreStack, succ)  				}  			}  			// Finally, we could pop this block once we pop all of its successors. -			b.blkVisited[blk] = visitStateDone +			blk.visited = visitStateDone  		case visitStateDone:  			// Note: at this point we push blk in postorder despite its name.  			reversePostOrder = append(reversePostOrder, blk) +		default: +			panic("BUG")  		}  	}  	// At this point, reversePostOrder has postorder actually, so we reverse it. @@ -67,7 +68,7 @@ func passCalculateImmediateDominators(b *builder) {  	}  	for i, blk := range reversePostOrder { -		blk.reversePostOrder = i +		blk.reversePostOrder = int32(i)  	}  	// Reuse the dominators slice if possible from the previous computation of function. @@ -180,7 +181,7 @@ func passBuildLoopNestingForest(b *builder) {  			b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk)  		} else if n == ent {  		} else if n.loopHeader { -			n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk) +			n.loopNestingForestChildren = n.loopNestingForestChildren.Append(&b.varLengthBasicBlockPool, blk)  		}  	} @@ -193,7 +194,7 @@ func passBuildLoopNestingForest(b *builder) {  func printLoopNestingForest(root *basicBlock, depth int) {  	fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID()) -	for _, child := range root.loopNestingForestChildren { +	for _, child := range root.loopNestingForestChildren.View() {  		fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID())  		if child.LoopHeader() {  			printLoopNestingForest(child.(*basicBlock), depth+2) @@ -202,10 +203,10 @@ func printLoopNestingForest(root *basicBlock, depth int) {  }  type dominatorSparseTree struct { -	time         int +	time         int32  	euler        []*basicBlock -	first, depth []int -	table        [][]int +	first, depth []int32 +	table        [][]int32  }  // passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree. @@ -232,11 +233,11 @@ func passBuildDominatorTree(b *builder) {  	n := b.basicBlocksPool.Allocated()  	st := &b.sparseTree  	st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...) -	st.first = append(st.first[:0], make([]int, n)...) +	st.first = append(st.first[:0], make([]int32, n)...)  	for i := range st.first {  		st.first[i] = -1  	} -	st.depth = append(st.depth[:0], make([]int, 2*n-1)...) +	st.depth = append(st.depth[:0], make([]int32, 2*n-1)...)  	st.time = 0  	// Start building the sparse tree. @@ -244,9 +245,9 @@ func passBuildDominatorTree(b *builder) {  	st.buildSparseTable()  } -func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) { +func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int32) {  	if wazevoapi.SSALoggingEnabled { -		fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID()) +		fmt.Println(strings.Repeat("\t", int(height)), "euler tour:", node.ID())  	}  	dt.euler[dt.time] = node  	dt.depth[dt.time] = height @@ -270,13 +271,13 @@ func (dt *dominatorSparseTree) buildSparseTable() {  	table := dt.table  	if n >= len(table) { -		table = append(table, make([][]int, n+1)...) +		table = append(table, make([][]int32, n-len(table)+1)...)  	}  	for i := range table {  		if len(table[i]) < k { -			table[i] = append(table[i], make([]int, k)...) +			table[i] = append(table[i], make([]int32, k-len(table[i]))...)  		} -		table[i][0] = i +		table[i][0] = int32(i)  	}  	for j := 1; 1<<j <= n; j++ { @@ -292,7 +293,7 @@ func (dt *dominatorSparseTree) buildSparseTable() {  }  // rmq performs a range minimum query on the sparse table. -func (dt *dominatorSparseTree) rmq(l, r int) int { +func (dt *dominatorSparseTree) rmq(l, r int32) int32 {  	table := dt.table  	depth := dt.depth  	j := int(math.Log2(float64(r - l + 1))) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go index e8c8cd9de..73daf4269 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/type.go @@ -21,6 +21,9 @@ const (  	// TypeV128 represents 128-bit SIMD vectors.  	TypeV128 + +	// -- Do not add new types after this line. ---- +	typeEnd  )  // String implements fmt.Stringer. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go index 3149fdc9e..313e34f9a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi/pool.go @@ -69,7 +69,7 @@ type IDedPool[T any] struct {  // NewIDedPool returns a new IDedPool.  func NewIDedPool[T any](resetFn func(*T)) IDedPool[T] { -	return IDedPool[T]{pool: NewPool[T](resetFn)} +	return IDedPool[T]{pool: NewPool[T](resetFn), maxIDEncountered: -1}  }  // GetOrAllocate returns the T with the given id. @@ -97,7 +97,7 @@ func (p *IDedPool[T]) Get(id int) *T {  // Reset resets the pool.  func (p *IDedPool[T]) Reset() {  	p.pool.Reset() -	for i := range p.idToItems { +	for i := 0; i <= p.maxIDEncountered; i++ {  		p.idToItems[i] = nil  	}  	p.maxIDEncountered = -1 diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go index 25d7d3fdc..0dc6ec19c 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid.go @@ -6,6 +6,9 @@ type CpuFeatureFlags interface {  	Has(cpuFeature CpuFeature) bool  	// HasExtra returns true when the specified extraFlag (represented as uint64) is supported  	HasExtra(cpuFeature CpuFeature) bool +	// Raw returns the raw bitset that represents CPU features used by wazero. This can be used for cache keying. +	// For now, we only use four features, so uint64 is enough. +	Raw() uint64  }  type CpuFeature uint64 @@ -17,9 +20,11 @@ const (  	CpuFeatureAmd64SSE4_1 CpuFeature = 1 << 19  	// CpuFeatureAmd64SSE4_2 is the flag to query CpuFeatureFlags.Has for SSEv4.2 capabilities on amd64  	CpuFeatureAmd64SSE4_2 CpuFeature = 1 << 20 +	// Note: when adding new features, ensure that the feature is included in CpuFeatureFlags.Raw.  )  const (  	// CpuExtraFeatureAmd64ABM is the flag to query CpuFeatureFlags.HasExtra for Advanced Bit Manipulation capabilities (e.g. LZCNT) on amd64  	CpuExtraFeatureAmd64ABM CpuFeature = 1 << 5 +	// Note: when adding new features, ensure that the feature is included in CpuFeatureFlags.Raw.  ) diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go index 8c9f1a9f3..fbdb53936 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_amd64.go @@ -2,10 +2,10 @@  package platform -// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods -var CpuFeatures CpuFeatureFlags = loadCpuFeatureFlags() +// CpuFeatures exposes the capabilities for this CPU, queried via the Has, HasExtra methods. +var CpuFeatures = loadCpuFeatureFlags() -// cpuFeatureFlags implements CpuFeatureFlags interface +// cpuFeatureFlags implements CpuFeatureFlags interface.  type cpuFeatureFlags struct {  	flags      uint64  	extraFlags uint64 @@ -15,13 +15,13 @@ type cpuFeatureFlags struct {  // implemented in impl_amd64.s  func cpuid(arg1, arg2 uint32) (eax, ebx, ecx, edx uint32) -// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap +// cpuidAsBitmap combines the result of invoking cpuid to uint64 bitmap.  func cpuidAsBitmap(arg1, arg2 uint32) uint64 {  	_ /* eax */, _ /* ebx */, ecx, edx := cpuid(arg1, arg2)  	return (uint64(edx) << 32) | uint64(ecx)  } -// loadStandardRange load flags from the standard range, panics otherwise +// loadStandardRange load flags from the standard range, panics otherwise.  func loadStandardRange(id uint32) uint64 {  	// ensure that the id is in the valid range, returned by cpuid(0,0)  	maxRange, _, _, _ := cpuid(0, 0) @@ -31,7 +31,7 @@ func loadStandardRange(id uint32) uint64 {  	return cpuidAsBitmap(id, 0)  } -// loadStandardRange load flags from the extended range, panics otherwise +// loadStandardRange load flags from the extended range, panics otherwise.  func loadExtendedRange(id uint32) uint64 {  	// ensure that the id is in the valid range, returned by cpuid(0x80000000,0)  	maxRange, _, _, _ := cpuid(0x80000000, 0) @@ -48,12 +48,32 @@ func loadCpuFeatureFlags() CpuFeatureFlags {  	}  } -// Has implements the same method on the CpuFeatureFlags interface +// Has implements the same method on the CpuFeatureFlags interface.  func (f *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool {  	return (f.flags & uint64(cpuFeature)) != 0  } -// HasExtra implements the same method on the CpuFeatureFlags interface +// HasExtra implements the same method on the CpuFeatureFlags interface.  func (f *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool {  	return (f.extraFlags & uint64(cpuFeature)) != 0  } + +// Raw implements the same method on the CpuFeatureFlags interface. +func (f *cpuFeatureFlags) Raw() uint64 { +	// Below, we only set the first 4 bits for the features we care about, +	// instead of setting all the unnecessary bits obtained from the CPUID instruction. +	var ret uint64 +	if f.Has(CpuFeatureAmd64SSE3) { +		ret = 1 << 0 +	} +	if f.Has(CpuFeatureAmd64SSE4_1) { +		ret |= 1 << 1 +	} +	if f.Has(CpuFeatureAmd64SSE4_2) { +		ret |= 1 << 2 +	} +	if f.HasExtra(CpuExtraFeatureAmd64ABM) { +		ret |= 1 << 3 +	} +	return ret +} diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go index 8ae826d36..291bcea65 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/cpuid_unsupported.go @@ -4,11 +4,14 @@ package platform  var CpuFeatures CpuFeatureFlags = &cpuFeatureFlags{} -// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms +// cpuFeatureFlags implements CpuFeatureFlags for unsupported platforms.  type cpuFeatureFlags struct{} -// Has implements the same method on the CpuFeatureFlags interface +// Has implements the same method on the CpuFeatureFlags interface.  func (c *cpuFeatureFlags) Has(cpuFeature CpuFeature) bool { return false } -// HasExtra implements the same method on the CpuFeatureFlags interface +// HasExtra implements the same method on the CpuFeatureFlags interface.  func (c *cpuFeatureFlags) HasExtra(cpuFeature CpuFeature) bool { return false } + +// Raw implements the same method on the CpuFeatureFlags interface. +func (c *cpuFeatureFlags) Raw() uint64 { return 0 } diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go index a61996d58..b0519003b 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unix.go @@ -12,8 +12,6 @@ const (  	mmapProtARM64 = syscall.PROT_READ | syscall.PROT_WRITE  ) -const MmapSupported = true -  func munmapCodeSegment(code []byte) error {  	return syscall.Munmap(code)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go index 27833db37..079aa643f 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_unsupported.go @@ -9,8 +9,6 @@ import (  var errUnsupported = fmt.Errorf("mmap unsupported on GOOS=%s. Use interpreter instead.", runtime.GOOS) -const MmapSupported = false -  func munmapCodeSegment(code []byte) error {  	panic(errUnsupported)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go index 69fcb6d6b..03a254d4a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go +++ b/vendor/github.com/tetratelabs/wazero/internal/platform/mmap_windows.go @@ -21,8 +21,6 @@ const (  	windows_PAGE_EXECUTE_READWRITE uintptr = 0x00000040  ) -const MmapSupported = true -  func munmapCodeSegment(code []byte) error {  	return freeMemory(code)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go index 9a77205bb..fdbf1fde0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go +++ b/vendor/github.com/tetratelabs/wazero/internal/sysfs/file.go @@ -38,9 +38,6 @@ func NewStdioFile(stdin bool, f fs.File) (fsapi.File, error) {  }  func OpenFile(path string, flag experimentalsys.Oflag, perm fs.FileMode) (*os.File, experimentalsys.Errno) { -	if flag&experimentalsys.O_DIRECTORY != 0 && flag&(experimentalsys.O_WRONLY|experimentalsys.O_RDWR) != 0 { -		return nil, experimentalsys.EISDIR // invalid to open a directory writeable -	}  	return openFile(path, flag, perm)  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go index 58a458217..61a342ef2 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/engine.go @@ -69,4 +69,7 @@ type ModuleEngine interface {  	// FunctionInstanceReference returns Reference for the given Index for a FunctionInstance. The returned values are used by  	// the initialization via ElementSegment.  	FunctionInstanceReference(funcIndex Index) Reference + +	// MemoryGrown notifies the engine that the memory has grown. +	MemoryGrown()  } diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go index 8da689076..ce2c7254d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/func_validation.go @@ -67,11 +67,6 @@ func (m *Module) validateFunctionWithMaxStackValues(  	declaredFunctionIndexes map[Index]struct{},  	br *bytes.Reader,  ) error { -	nonStaticLocals := make(map[Index]struct{}) -	if len(m.NonStaticLocals) > 0 { -		m.NonStaticLocals[idx] = nonStaticLocals -	} -  	functionType := &m.TypeSection[m.FunctionSection[idx]]  	code := &m.CodeSection[idx]  	body := code.Body @@ -357,7 +352,6 @@ func (m *Module) validateFunctionWithMaxStackValues(  					return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))",  						OpcodeLocalSetName, index, l)  				} -				nonStaticLocals[index] = struct{}{}  				var expType ValueType  				if index < inputLen {  					expType = functionType.Params[index] @@ -373,7 +367,6 @@ func (m *Module) validateFunctionWithMaxStackValues(  					return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))",  						OpcodeLocalTeeName, index, l)  				} -				nonStaticLocals[index] = struct{}{}  				var expType ValueType  				if index < inputLen {  					expType = functionType.Params[index] diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go index 5cc5012da..947b16112 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go @@ -59,11 +59,14 @@ type MemoryInstance struct {  	// with a fixed weight of 1 and no spurious notifications.  	waiters sync.Map +	// ownerModuleEngine is the module engine that owns this memory instance. +	ownerModuleEngine ModuleEngine +  	expBuffer experimental.LinearMemory  }  // NewMemoryInstance creates a new instance based on the parameters in the SectionIDMemory. -func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *MemoryInstance { +func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator, moduleEngine ModuleEngine) *MemoryInstance {  	minBytes := MemoryPagesToBytesNum(memSec.Min)  	capBytes := MemoryPagesToBytesNum(memSec.Cap)  	maxBytes := MemoryPagesToBytesNum(memSec.Max) @@ -89,12 +92,13 @@ func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *  		buffer = make([]byte, minBytes, capBytes)  	}  	return &MemoryInstance{ -		Buffer:    buffer, -		Min:       memSec.Min, -		Cap:       memoryBytesNumToPages(uint64(cap(buffer))), -		Max:       memSec.Max, -		Shared:    memSec.IsShared, -		expBuffer: expBuffer, +		Buffer:            buffer, +		Min:               memSec.Min, +		Cap:               memoryBytesNumToPages(uint64(cap(buffer))), +		Max:               memSec.Max, +		Shared:            memSec.IsShared, +		expBuffer:         expBuffer, +		ownerModuleEngine: moduleEngine,  	}  } @@ -247,14 +251,12 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) {  			m.Buffer = buffer  			m.Cap = newPages  		} -		return currentPages, true  	} else if newPages > m.Cap { // grow the memory.  		if m.Shared {  			panic("shared memory cannot be grown, this is a bug in wazero")  		}  		m.Buffer = append(m.Buffer, make([]byte, MemoryPagesToBytesNum(delta))...)  		m.Cap = newPages -		return currentPages, true  	} else { // We already have the capacity we need.  		if m.Shared {  			// We assume grow is called under a guest lock. @@ -264,8 +266,9 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) {  		} else {  			m.Buffer = m.Buffer[:MemoryPagesToBytesNum(newPages)]  		} -		return currentPages, true  	} +	m.ownerModuleEngine.MemoryGrown() +	return currentPages, true  }  // Pages implements the same method as documented on api.Memory. diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go index 68573b918..8369ad9ed 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go @@ -185,9 +185,6 @@ type Module struct {  	// as described in https://yurydelendik.github.io/webassembly-dwarf/, though it is not specified in the Wasm  	// specification: https://github.com/WebAssembly/debugging/issues/1  	DWARFLines *wasmdebug.DWARFLines - -	// NonStaticLocals collects the local indexes that will change its value through either local.get or local.tee. -	NonStaticLocals []map[Index]struct{}  }  // ModuleID represents sha256 hash value uniquely assigned to Module. @@ -366,8 +363,6 @@ func (m *Module) validateFunctions(enabledFeatures api.CoreFeatures, functions [  	br := bytes.NewReader(nil)  	// Also, we reuse the stacks across multiple function validations to reduce allocations.  	vs := &stacks{} -	// Non-static locals are gathered during validation and used in the down-stream compilation. -	m.NonStaticLocals = make([]map[Index]struct{}, len(m.FunctionSection))  	for idx, typeIndex := range m.FunctionSection {  		if typeIndex >= typeCount {  			return fmt.Errorf("invalid %s: type section index %d out of range", m.funcDesc(SectionIDFunction, Index(idx)), typeIndex) @@ -655,7 +650,7 @@ func paramNames(localNames IndirectNameMap, funcIdx uint32, paramLen int) []stri  func (m *ModuleInstance) buildMemory(module *Module, allocator experimental.MemoryAllocator) {  	memSec := module.MemorySection  	if memSec != nil { -		m.MemoryInstance = NewMemoryInstance(memSec, allocator) +		m.MemoryInstance = NewMemoryInstance(memSec, allocator, m.Engine)  		m.MemoryInstance.definition = &module.MemoryDefinitionSection[0]  	}  } diff --git a/vendor/modules.txt b/vendor/modules.txt index d0fd99a6c..201dcdd5c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -517,7 +517,7 @@ github.com/modern-go/concurrent  # github.com/modern-go/reflect2 v1.0.2  ## explicit; go 1.12  github.com/modern-go/reflect2 -# github.com/ncruces/go-sqlite3 v0.16.1 +# github.com/ncruces/go-sqlite3 v0.16.2  ## explicit; go 1.21  github.com/ncruces/go-sqlite3  github.com/ncruces/go-sqlite3/driver @@ -833,7 +833,7 @@ github.com/tdewolff/parse/v2/strconv  # github.com/technologize/otel-go-contrib v1.1.1  ## explicit; go 1.17  github.com/technologize/otel-go-contrib/otelginmetrics -# github.com/tetratelabs/wazero v1.7.2 +# github.com/tetratelabs/wazero v1.7.3  ## explicit; go 1.20  github.com/tetratelabs/wazero  github.com/tetratelabs/wazero/api  | 
