diff options
Diffstat (limited to 'vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go')
-rw-r--r-- | vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go | 26 |
1 files changed, 18 insertions, 8 deletions
diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go index 18c5f4252..ee0b453ca 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go @@ -3901,14 +3901,9 @@ func (ce *callEngine) callNativeFunc(ctx context.Context, m *wasm.ModuleInstance case operationKindV128Dot: x2Hi, x2Lo := ce.popValue(), ce.popValue() x1Hi, x1Lo := ce.popValue(), ce.popValue() - ce.pushValue( - uint64(uint32(int32(int16(x1Lo>>0))*int32(int16(x2Lo>>0))+int32(int16(x1Lo>>16))*int32(int16(x2Lo>>16)))) | - (uint64(uint32(int32(int16(x1Lo>>32))*int32(int16(x2Lo>>32))+int32(int16(x1Lo>>48))*int32(int16(x2Lo>>48)))) << 32), - ) - ce.pushValue( - uint64(uint32(int32(int16(x1Hi>>0))*int32(int16(x2Hi>>0))+int32(int16(x1Hi>>16))*int32(int16(x2Hi>>16)))) | - (uint64(uint32(int32(int16(x1Hi>>32))*int32(int16(x2Hi>>32))+int32(int16(x1Hi>>48))*int32(int16(x2Hi>>48)))) << 32), - ) + lo, hi := v128Dot(x1Hi, x1Lo, x2Hi, x2Lo) + ce.pushValue(lo) + ce.pushValue(hi) frame.pc++ case operationKindV128ITruncSatFromF: hi, lo := ce.popValue(), ce.popValue() @@ -4584,3 +4579,18 @@ func (ce *callEngine) callGoFuncWithStack(ctx context.Context, m *wasm.ModuleIns ce.stack = ce.stack[0 : len(ce.stack)-shrinkLen] } } + +// v128Dot performs a dot product of two 64-bit vectors. +// Note: for some reason (which I suspect is due to a bug in Go compiler's regalloc), +// inlining this function causes a bug which happens **only when** we run with -race AND arm64 AND Go 1.22. +func v128Dot(x1Hi, x1Lo, x2Hi, x2Lo uint64) (uint64, uint64) { + r1 := int32(int16(x1Lo>>0)) * int32(int16(x2Lo>>0)) + r2 := int32(int16(x1Lo>>16)) * int32(int16(x2Lo>>16)) + r3 := int32(int16(x1Lo>>32)) * int32(int16(x2Lo>>32)) + r4 := int32(int16(x1Lo>>48)) * int32(int16(x2Lo>>48)) + r5 := int32(int16(x1Hi>>0)) * int32(int16(x2Hi>>0)) + r6 := int32(int16(x1Hi>>16)) * int32(int16(x2Hi>>16)) + r7 := int32(int16(x1Hi>>32)) * int32(int16(x2Hi>>32)) + r8 := int32(int16(x1Hi>>48)) * int32(int16(x2Hi>>48)) + return uint64(uint32(r1+r2)) | (uint64(uint32(r3+r4)) << 32), uint64(uint32(r5+r6)) | (uint64(uint32(r7+r8)) << 32) +} |