summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/cpuid/v2/cpuid.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid.go146
1 files changed, 131 insertions, 15 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
index 27f33250e..cf2ae9c51 100644
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -73,6 +73,7 @@ const (
AMD3DNOW // AMD 3DNOW
AMD3DNOWEXT // AMD 3DNowExt
AMXBF16 // Tile computational operations on BFLOAT16 numbers
+ AMXFP16 // Tile computational operations on FP16 numbers
AMXINT8 // Tile computational operations on 8-bit integers
AMXTILE // Tile architecture
AVX // AVX functions
@@ -93,8 +94,11 @@ const (
AVX512VNNI // AVX-512 Vector Neural Network Instructions
AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
+ AVXIFMA // AVX-IFMA instructions
+ AVXNECONVERT // AVX-NE-CONVERT instructions
AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
+ AVXVNNIINT8 // AVX-VNNI-INT8 instructions
BMI1 // Bit Manipulation Instruction Set 1
BMI2 // Bit Manipulation Instruction Set 2
CETIBT // Intel CET Indirect Branch Tracking
@@ -103,15 +107,22 @@ const (
CLMUL // Carry-less Multiplication
CLZERO // CLZERO instruction supported
CMOV // i686 CMOV
+ CMPCCXADD // CMPCCXADD instructions
CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
CMPXCHG8 // CMPXCHG8 instruction
CPBOOST // Core Performance Boost
+ CPPC // AMD: Collaborative Processor Performance Control
CX16 // CMPXCHG16B Instruction
+ EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
ENQCMD // Enqueue Command
ERMS // Enhanced REP MOVSB/STOSB
F16C // Half-precision floating-point conversion
+ FLUSH_L1D // Flush L1D cache
FMA3 // Intel FMA 3. Does not imply AVX.
FMA4 // Bulldozer FMA4 functions
+ FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+ FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+ FSRM // Fast Short Rep Mov
FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
FXSROPT // FXSAVE/FXRSTOR optimizations
GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
@@ -119,8 +130,14 @@ const (
HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
HTT // Hyperthreading (enabled)
HWA // Hardware assert supported. Indicates support for MSRC001_10
+ HYBRID_CPU // This part has CPUs of more than one type.
HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
+ IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
+ IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+ IBRS // AMD: Indirect Branch Restricted Speculation
+ IBRS_PREFERRED // AMD: IBRS is preferred over software solution
+ IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
IBS // Instruction Based Sampling (AMD)
IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
@@ -130,7 +147,11 @@ const (
IBSOPSAM // Instruction Based Sampling Feature (AMD)
IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
+ IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
+ IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
+ IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
IBS_PREVENTHOST // Disallowing IBS use by the host supported
+ IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
INT_WBINVD // WBINVD/WBNOINVD are interruptible.
INVLPGB // NVLPGB and TLBSYNC instruction supported
LAHF // LAHF/SAHF in long mode
@@ -138,13 +159,16 @@ const (
LBRVIRT // LBR virtualization
LZCNT // LZCNT instruction
MCAOVERFLOW // MCA overflow recovery support.
+ MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
MCOMMIT // MCOMMIT instruction supported
+ MD_CLEAR // VERW clears CPU buffers
MMX // standard MMX
MMXEXT // SSE integer functions or AMD MMX ext
MOVBE // MOVBE instruction (big-endian)
MOVDIR64B // Move 64 Bytes as Direct Store
MOVDIRI // Move Doubleword as Direct Store
MOVSB_ZL // Fast Zero-Length MOVSB
+ MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
MPX // Intel MPX (Memory Protection Extensions)
MSRIRC // Instruction Retired Counter MSR available
MSR_PAGEFLUSH // Page Flush MSR available
@@ -153,6 +177,9 @@ const (
OSXSAVE // XSAVE enabled by OS
PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
POPCNT // POPCNT instruction
+ PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+ PREFETCHI // PREFETCHIT0/1 instructions
+ PSFD // AMD: Predictive Store Forward Disable
RDPRU // RDPRU instruction supported
RDRAND // RDRAND instruction is available
RDSEED // RDSEED instruction is available
@@ -172,6 +199,8 @@ const (
SHA // Intel SHA Extensions
SME // AMD Secure Memory Encryption supported
SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
+ SPEC_CTRL_SSBD // Speculative Store Bypass Disable
+ SRBDS_CTRL // SRBDS mitigation MSR available
SSE // SSE functions
SSE2 // P4 SSE functions
SSE3 // Prescott SSE3 functions
@@ -180,6 +209,7 @@ const (
SSE4A // AMD Barcelona microarchitecture SSE4a instructions
SSSE3 // Conroe SSSE3 functions
STIBP // Single Thread Indirect Branch Predictors
+ STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
STOSB_SHORT // Fast short STOSB
SUCCOR // Software uncorrectable error containment and recovery capability.
SVM // AMD Secure Virtual Machine
@@ -192,8 +222,9 @@ const (
SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
SYSEE // SYSENTER and SYSEXIT instructions
TBM // AMD Trailing Bit Manipulation
- TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+ TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+ TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
TSXLDTRK // Intel TSX Suspend Load Address Tracking
VAES // Vector AES. AVX(512) versions requires additional checks.
@@ -358,7 +389,7 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool {
// Has allows for checking a single feature.
// Should be inlined by the compiler.
-func (c CPUInfo) Has(id FeatureID) bool {
+func (c *CPUInfo) Has(id FeatureID) bool {
return c.featureSet.inSet(id)
}
@@ -372,26 +403,47 @@ func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
return false
}
+// Features contains several features combined for a fast check using
+// CpuInfo.HasAll
+type Features *flagSet
+
+// CombineFeatures allows to combine several features for a close to constant time lookup.
+func CombineFeatures(ids ...FeatureID) Features {
+ var v flagSet
+ for _, id := range ids {
+ v.set(id)
+ }
+ return &v
+}
+
+func (c *CPUInfo) HasAll(f Features) bool {
+ return c.featureSet.hasSetP(f)
+}
+
// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
-var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
-var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
-var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
-var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
+var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
+var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
// X64Level returns the microarchitecture level detected on the CPU.
// If features are lacking or non x64 mode, 0 is returned.
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
func (c CPUInfo) X64Level() int {
- if c.featureSet.hasSet(level4Features) {
+ if !c.featureSet.hasOneOf(oneOfLevel) {
+ return 0
+ }
+ if c.featureSet.hasSetP(level4Features) {
return 4
}
- if c.featureSet.hasSet(level3Features) {
+ if c.featureSet.hasSetP(level3Features) {
return 3
}
- if c.featureSet.hasSet(level2Features) {
+ if c.featureSet.hasSetP(level2Features) {
return 2
}
- if c.featureSet.hasSet(level1Features) {
+ if c.featureSet.hasSetP(level1Features) {
return 1
}
return 0
@@ -555,7 +607,7 @@ const flagMask = flagBits - 1
// flagSet contains detected cpu features and characteristics in an array of flags
type flagSet [(lastID + flagMask) / flagBits]flags
-func (s flagSet) inSet(feat FeatureID) bool {
+func (s *flagSet) inSet(feat FeatureID) bool {
return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
}
@@ -585,7 +637,17 @@ func (s *flagSet) or(other flagSet) {
}
// hasSet returns whether all features are present.
-func (s flagSet) hasSet(other flagSet) bool {
+func (s *flagSet) hasSet(other flagSet) bool {
+ for i, v := range other[:] {
+ if s[i]&v != v {
+ return false
+ }
+ }
+ return true
+}
+
+// hasSet returns whether all features are present.
+func (s *flagSet) hasSetP(other *flagSet) bool {
for i, v := range other[:] {
if s[i]&v != v {
return false
@@ -594,8 +656,18 @@ func (s flagSet) hasSet(other flagSet) bool {
return true
}
+// hasOneOf returns whether one or more features are present.
+func (s *flagSet) hasOneOf(other *flagSet) bool {
+ for i, v := range other[:] {
+ if s[i]&v != 0 {
+ return true
+ }
+ }
+ return false
+}
+
// nEnabled will return the number of enabled flags.
-func (s flagSet) nEnabled() (n int) {
+func (s *flagSet) nEnabled() (n int) {
for _, v := range s[:] {
n += bits.OnesCount64(uint64(v))
}
@@ -1093,21 +1165,36 @@ func support() flagSet {
fs.setIf(ecx&(1<<30) != 0, SGXLC)
// CPUID.(EAX=7, ECX=0).EDX
+ fs.setIf(edx&(1<<4) != 0, FSRM)
+ fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
+ fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+ fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
fs.setIf(edx&(1<<18) != 0, PCONFIG)
fs.setIf(edx&(1<<20) != 0, CETIBT)
fs.setIf(edx&(1<<26) != 0, IBPB)
fs.setIf(edx&(1<<27) != 0, STIBP)
+ fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
+ fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
+ fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
+ fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
+
+ // CPUID.(EAX=7, ECX=1).EDX
+ fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
+ fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
+ fs.setIf(edx&(1<<14) != 0, PREFETCHI)
- // CPUID.(EAX=7, ECX=1)
+ // CPUID.(EAX=7, ECX=1).EAX
eax1, _, _, _ := cpuidex(7, 1)
fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+ fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
fs.setIf(eax1&(1<<22) != 0, HRESET)
+ fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
fs.setIf(eax1&(1<<26) != 0, LAM)
// Only detect AVX-512 features if XGETBV is supported
@@ -1145,9 +1232,15 @@ func support() flagSet {
fs.setIf(edx&(1<<25) != 0, AMXINT8)
// eax1 = CPUID.(EAX=7, ECX=1).EAX
fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+ fs.setIf(eax1&(1<<21) != 0, AMXFP16)
}
}
+
+ // CPUID.(EAX=7, ECX=2)
+ _, _, _, edx = cpuidex(7, 2)
+ fs.setIf(edx&(1<<5) != 0, MCDT_NO)
}
+
// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
// EAX
// Bit 00: XSAVEOPT is available.
@@ -1212,9 +1305,21 @@ func support() flagSet {
if maxExtendedFunction() >= 0x80000008 {
_, b, _, _ := cpuid(0x80000008)
+ fs.setIf(b&(1<<28) != 0, PSFD)
+ fs.setIf(b&(1<<27) != 0, CPPC)
+ fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
+ fs.setIf(b&(1<<23) != 0, PPIN)
+ fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
+ fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
+ fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
+ fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
+ fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
+ fs.setIf(b&(1<<15) != 0, STIBP)
+ fs.setIf(b&(1<<14) != 0, IBRS)
+ fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+ fs.setIf(b&(1<<12) != 0, IBPB)
fs.setIf((b&(1<<9)) != 0, WBNOINVD)
fs.setIf((b&(1<<8)) != 0, MCOMMIT)
- fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
fs.setIf((b&(1<<4)) != 0, RDPRU)
fs.setIf((b&(1<<3)) != 0, INVLPGB)
fs.setIf((b&(1<<1)) != 0, MSRIRC)
@@ -1235,6 +1340,13 @@ func support() flagSet {
fs.setIf((edx>>12)&1 == 1, SVMPFT)
}
+ if maxExtendedFunction() >= 0x8000001a {
+ eax, _, _, _ := cpuid(0x8000001a)
+ fs.setIf((eax>>0)&1 == 1, FP128)
+ fs.setIf((eax>>1)&1 == 1, MOVU)
+ fs.setIf((eax>>2)&1 == 1, FP256)
+ }
+
if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
eax, _, _, _ := cpuid(0x8000001b)
fs.setIf((eax>>0)&1 == 1, IBSFFV)
@@ -1245,6 +1357,10 @@ func support() flagSet {
fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+ fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
+ fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
+ fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
+ fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
}
if maxExtendedFunction() >= 0x8000001f && vend == AMD {