1 files changed, 131 insertions, 15 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
index 27f33250e..cf2ae9c51 100644
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -73,6 +73,7 @@ const (
 	AMD3DNOW                            // AMD 3DNOW
 	AMD3DNOWEXT                         // AMD 3DNowExt
 	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
+	AMXFP16                             // Tile computational operations on FP16 numbers
 	AMXINT8                             // Tile computational operations on 8-bit integers
 	AMXTILE                             // Tile architecture
 	AVX                                 // AVX functions
@@ -93,8 +94,11 @@ const (
 	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
 	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
 	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
+	AVXIFMA                             // AVX-IFMA instructions
+	AVXNECONVERT                        // AVX-NE-CONVERT instructions
 	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
 	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
+	AVXVNNIINT8                         // AVX-VNNI-INT8 instructions
 	BMI1                                // Bit Manipulation Instruction Set 1
 	BMI2                                // Bit Manipulation Instruction Set 2
 	CETIBT                              // Intel CET Indirect Branch Tracking
@@ -103,15 +107,22 @@ const (
 	CLMUL                               // Carry-less Multiplication
 	CLZERO                              // CLZERO instruction supported
 	CMOV                                // i686 CMOV
+	CMPCCXADD                           // CMPCCXADD instructions
 	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
 	CMPXCHG8                            // CMPXCHG8 instruction
 	CPBOOST                             // Core Performance Boost
+	CPPC                                // AMD: Collaborative Processor Performance Control
 	CX16                                // CMPXCHG16B Instruction
+	EFER_LMSLE_UNS                      // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
 	ENQCMD                              // Enqueue Command
 	ERMS                                // Enhanced REP MOVSB/STOSB
 	F16C                                // Half-precision floating-point conversion
+	FLUSH_L1D                           // Flush L1D cache
 	FMA3                                // Intel FMA 3. Does not imply AVX.
 	FMA4                                // Bulldozer FMA4 functions
+	FP128                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+	FP256                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+	FSRM                                // Fast Short Rep Mov
 	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
 	FXSROPT                             // FXSAVE/FXRSTOR optimizations
 	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
@@ -119,8 +130,14 @@ const (
 	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
 	HTT                                 // Hyperthreading (enabled)
 	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
+	HYBRID_CPU                          // This part has CPUs of more than one type.
 	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
+	IA32_ARCH_CAP                       // IA32_ARCH_CAPABILITIES MSR (Intel)
+	IA32_CORE_CAP                       // IA32_CORE_CAPABILITIES MSR
 	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	IBRS                                // AMD: Indirect Branch Restricted Speculation
+	IBRS_PREFERRED                      // AMD: IBRS is preferred over software solution
+	IBRS_PROVIDES_SMP                   // AMD: IBRS provides Same Mode Protection
 	IBS                                 // Instruction Based Sampling (AMD)
 	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
 	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
@@ -130,7 +147,11 @@ const (
 	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
 	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
 	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
+	IBS_FETCH_CTLX                      // AMD: IBS fetch control extended MSR supported
+	IBS_OPDATA4                         // AMD: IBS op data 4 MSR supported
+	IBS_OPFUSE                          // AMD: Indicates support for IbsOpFuse
 	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
+	IBS_ZEN4                            // AMD: Fetch and Op IBS support IBS extensions added with Zen4
 	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
 	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
 	LAHF                                // LAHF/SAHF in long mode
@@ -138,13 +159,16 @@ const (
 	LBRVIRT                             // LBR virtualization
 	LZCNT                               // LZCNT instruction
 	MCAOVERFLOW                         // MCA overflow recovery support.
+	MCDT_NO                             // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
 	MCOMMIT                             // MCOMMIT instruction supported
+	MD_CLEAR                            // VERW clears CPU buffers
 	MMX                                 // standard MMX
 	MMXEXT                              // SSE integer functions or AMD MMX ext
 	MOVBE                               // MOVBE instruction (big-endian)
 	MOVDIR64B                           // Move 64 Bytes as Direct Store
 	MOVDIRI                             // Move Doubleword as Direct Store
 	MOVSB_ZL                            // Fast Zero-Length MOVSB
+	MOVU                                // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
 	MPX                                 // Intel MPX (Memory Protection Extensions)
 	MSRIRC                              // Instruction Retired Counter MSR available
 	MSR_PAGEFLUSH                       // Page Flush MSR available
@@ -153,6 +177,9 @@ const (
 	OSXSAVE                             // XSAVE enabled by OS
 	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
 	POPCNT                              // POPCNT instruction
+	PPIN                                // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+	PREFETCHI                           // PREFETCHIT0/1 instructions
+	PSFD                                // AMD: Predictive Store Forward Disable
 	RDPRU                               // RDPRU instruction supported
 	RDRAND                              // RDRAND instruction is available
 	RDSEED                              // RDSEED instruction is available
@@ -172,6 +199,8 @@ const (
 	SHA                                 // Intel SHA Extensions
 	SME                                 // AMD Secure Memory Encryption supported
 	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
+	SPEC_CTRL_SSBD                      // Speculative Store Bypass Disable
+	SRBDS_CTRL                          // SRBDS mitigation MSR available
 	SSE                                 // SSE functions
 	SSE2                                // P4 SSE functions
 	SSE3                                // Prescott SSE3 functions
@@ -180,6 +209,7 @@ const (
 	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
 	SSSE3                               // Conroe SSSE3 functions
 	STIBP                               // Single Thread Indirect Branch Predictors
+	STIBP_ALWAYSON                      // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
 	STOSB_SHORT                         // Fast short STOSB
 	SUCCOR                              // Software uncorrectable error containment and recovery capability.
 	SVM                                 // AMD Secure Virtual Machine
@@ -192,8 +222,9 @@ const (
 	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
 	SYSEE                               // SYSENTER and SYSEXIT instructions
 	TBM                                 // AMD Trailing Bit Manipulation
-	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	TLB_FLUSH_NESTED                    // AMD: Flushing includes all the nested translations for guest translations
 	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
 	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
 	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
 	VAES                                // Vector AES. AVX(512) versions requires additional checks.
@@ -358,7 +389,7 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool {
 
 // Has allows for checking a single feature.
 // Should be inlined by the compiler.
-func (c CPUInfo) Has(id FeatureID) bool {
+func (c *CPUInfo) Has(id FeatureID) bool {
 	return c.featureSet.inSet(id)
 }
 
@@ -372,26 +403,47 @@ func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
 	return false
 }
 
+// Features contains several features combined for a fast check using
+// CpuInfo.HasAll
+type Features *flagSet
+
+// CombineFeatures allows to combine several features for a close to constant time lookup.
+func CombineFeatures(ids ...FeatureID) Features {
+	var v flagSet
+	for _, id := range ids {
+		v.set(id)
+	}
+	return &v
+}
+
+func (c *CPUInfo) HasAll(f Features) bool {
+	return c.featureSet.hasSetP(f)
+}
+
 // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
-var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
-var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
-var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
-var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
+var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
+var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
 
 // X64Level returns the microarchitecture level detected on the CPU.
 // If features are lacking or non x64 mode, 0 is returned.
 // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
 func (c CPUInfo) X64Level() int {
-	if c.featureSet.hasSet(level4Features) {
+	if !c.featureSet.hasOneOf(oneOfLevel) {
+		return 0
+	}
+	if c.featureSet.hasSetP(level4Features) {
 		return 4
 	}
-	if c.featureSet.hasSet(level3Features) {
+	if c.featureSet.hasSetP(level3Features) {
 		return 3
 	}
-	if c.featureSet.hasSet(level2Features) {
+	if c.featureSet.hasSetP(level2Features) {
 		return 2
 	}
-	if c.featureSet.hasSet(level1Features) {
+	if c.featureSet.hasSetP(level1Features) {
 		return 1
 	}
 	return 0
@@ -555,7 +607,7 @@ const flagMask = flagBits - 1
 // flagSet contains detected cpu features and characteristics in an array of flags
 type flagSet [(lastID + flagMask) / flagBits]flags
 
-func (s flagSet) inSet(feat FeatureID) bool {
+func (s *flagSet) inSet(feat FeatureID) bool {
 	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
 }
 
@@ -585,7 +637,17 @@ func (s *flagSet) or(other flagSet) {
 }
 
 // hasSet returns whether all features are present.
-func (s flagSet) hasSet(other flagSet) bool {
+func (s *flagSet) hasSet(other flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != v {
+			return false
+		}
+	}
+	return true
+}
+
+// hasSet returns whether all features are present.
+func (s *flagSet) hasSetP(other *flagSet) bool {
 	for i, v := range other[:] {
 		if s[i]&v != v {
 			return false
@@ -594,8 +656,18 @@ func (s flagSet) hasSet(other flagSet) bool {
 	return true
 }
 
+// hasOneOf returns whether one or more features are present.
+func (s *flagSet) hasOneOf(other *flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != 0 {
+			return true
+		}
+	}
+	return false
+}
+
 // nEnabled will return the number of enabled flags.
-func (s flagSet) nEnabled() (n int) {
+func (s *flagSet) nEnabled() (n int) {
 	for _, v := range s[:] {
 		n += bits.OnesCount64(uint64(v))
 	}
@@ -1093,21 +1165,36 @@ func support() flagSet {
 		fs.setIf(ecx&(1<<30) != 0, SGXLC)
 
 		// CPUID.(EAX=7, ECX=0).EDX
+		fs.setIf(edx&(1<<4) != 0, FSRM)
+		fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
+		fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
 		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
 		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+		fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
 		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
 		fs.setIf(edx&(1<<18) != 0, PCONFIG)
 		fs.setIf(edx&(1<<20) != 0, CETIBT)
 		fs.setIf(edx&(1<<26) != 0, IBPB)
 		fs.setIf(edx&(1<<27) != 0, STIBP)
+		fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
+		fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
+		fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
+		fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
+
+		// CPUID.(EAX=7, ECX=1).EDX
+		fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
+		fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
+		fs.setIf(edx&(1<<14) != 0, PREFETCHI)
 
-		// CPUID.(EAX=7, ECX=1)
+		// CPUID.(EAX=7, ECX=1).EAX
 		eax1, _, _, _ := cpuidex(7, 1)
 		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+		fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
 		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
 		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
 		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
 		fs.setIf(eax1&(1<<22) != 0, HRESET)
+		fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
 		fs.setIf(eax1&(1<<26) != 0, LAM)
 
 		// Only detect AVX-512 features if XGETBV is supported
@@ -1145,9 +1232,15 @@ func support() flagSet {
 				fs.setIf(edx&(1<<25) != 0, AMXINT8)
 				// eax1 = CPUID.(EAX=7, ECX=1).EAX
 				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+				fs.setIf(eax1&(1<<21) != 0, AMXFP16)
 			}
 		}
+
+		// CPUID.(EAX=7, ECX=2)
+		_, _, _, edx = cpuidex(7, 2)
+		fs.setIf(edx&(1<<5) != 0, MCDT_NO)
 	}
+
 	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
 	// EAX
 	// Bit 00: XSAVEOPT is available.
@@ -1212,9 +1305,21 @@ func support() flagSet {
 
 	if maxExtendedFunction() >= 0x80000008 {
 		_, b, _, _ := cpuid(0x80000008)
+		fs.setIf(b&(1<<28) != 0, PSFD)
+		fs.setIf(b&(1<<27) != 0, CPPC)
+		fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
+		fs.setIf(b&(1<<23) != 0, PPIN)
+		fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
+		fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
+		fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
+		fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
+		fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
+		fs.setIf(b&(1<<15) != 0, STIBP)
+		fs.setIf(b&(1<<14) != 0, IBRS)
+		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+		fs.setIf(b&(1<<12) != 0, IBPB)
 		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
 		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
-		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
 		fs.setIf((b&(1<<4)) != 0, RDPRU)
 		fs.setIf((b&(1<<3)) != 0, INVLPGB)
 		fs.setIf((b&(1<<1)) != 0, MSRIRC)
@@ -1235,6 +1340,13 @@ func support() flagSet {
 		fs.setIf((edx>>12)&1 == 1, SVMPFT)
 	}
 
+	if maxExtendedFunction() >= 0x8000001a {
+		eax, _, _, _ := cpuid(0x8000001a)
+		fs.setIf((eax>>0)&1 == 1, FP128)
+		fs.setIf((eax>>1)&1 == 1, MOVU)
+		fs.setIf((eax>>2)&1 == 1, FP256)
+	}
+
 	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
 		eax, _, _, _ := cpuid(0x8000001b)
 		fs.setIf((eax>>0)&1 == 1, IBSFFV)
@@ -1245,6 +1357,10 @@ func support() flagSet {
 		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
 		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
 		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+		fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
+		fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
+		fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
+		fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
 	}
 
 	if maxExtendedFunction() >= 0x8000001f && vend == AMD {