diff options
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
-rw-r--r-- | vendor/github.com/klauspost/cpuid/v2/cpuid.go | 146 |
1 files changed, 131 insertions, 15 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go index 27f33250e..cf2ae9c51 100644 --- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -73,6 +73,7 @@ const ( AMD3DNOW // AMD 3DNOW AMD3DNOWEXT // AMD 3DNowExt AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXFP16 // Tile computational operations on FP16 numbers AMXINT8 // Tile computational operations on 8-bit integers AMXTILE // Tile architecture AVX // AVX functions @@ -93,8 +94,11 @@ const ( AVX512VNNI // AVX-512 Vector Neural Network Instructions AVX512VP2INTERSECT // AVX-512 Intersect for D/Q AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + AVXIFMA // AVX-IFMA instructions + AVXNECONVERT // AVX-NE-CONVERT instructions AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one AVXVNNI // AVX (VEX encoded) VNNI neural network instructions + AVXVNNIINT8 // AVX-VNNI-INT8 instructions BMI1 // Bit Manipulation Instruction Set 1 BMI2 // Bit Manipulation Instruction Set 2 CETIBT // Intel CET Indirect Branch Tracking @@ -103,15 +107,22 @@ const ( CLMUL // Carry-less Multiplication CLZERO // CLZERO instruction supported CMOV // i686 CMOV + CMPCCXADD // CMPCCXADD instructions CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB CMPXCHG8 // CMPXCHG8 instruction CPBOOST // Core Performance Boost + CPPC // AMD: Collaborative Processor Performance Control CX16 // CMPXCHG16B Instruction + EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ ENQCMD // Enqueue Command ERMS // Enhanced REP MOVSB/STOSB F16C // Half-precision floating-point conversion + FLUSH_L1D // Flush L1D cache FMA3 // Intel FMA 3. Does not imply AVX. FMA4 // Bulldozer FMA4 functions + FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide + FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide + FSRM // Fast Short Rep Mov FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 FXSROPT // FXSAVE/FXRSTOR optimizations GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. @@ -119,8 +130,14 @@ const ( HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR HTT // Hyperthreading (enabled) HWA // Hardware assert supported. Indicates support for MSRC001_10 + HYBRID_CPU // This part has CPUs of more than one type. HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) + IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBRS // AMD: Indirect Branch Restricted Speculation + IBRS_PREFERRED // AMD: IBRS is preferred over software solution + IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection IBS // Instruction Based Sampling (AMD) IBSBRNTRGT // Instruction Based Sampling Feature (AMD) IBSFETCHSAM // Instruction Based Sampling Feature (AMD) @@ -130,7 +147,11 @@ const ( IBSOPSAM // Instruction Based Sampling Feature (AMD) IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported + IBS_OPDATA4 // AMD: IBS op data 4 MSR supported + IBS_OPFUSE // AMD: Indicates support for IbsOpFuse IBS_PREVENTHOST // Disallowing IBS use by the host supported + IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 INT_WBINVD // WBINVD/WBNOINVD are interruptible. INVLPGB // NVLPGB and TLBSYNC instruction supported LAHF // LAHF/SAHF in long mode @@ -138,13 +159,16 @@ const ( LBRVIRT // LBR virtualization LZCNT // LZCNT instruction MCAOVERFLOW // MCA overflow recovery support. + MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. MCOMMIT // MCOMMIT instruction supported + MD_CLEAR // VERW clears CPU buffers MMX // standard MMX MMXEXT // SSE integer functions or AMD MMX ext MOVBE // MOVBE instruction (big-endian) MOVDIR64B // Move 64 Bytes as Direct Store MOVDIRI // Move Doubleword as Direct Store MOVSB_ZL // Fast Zero-Length MOVSB + MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD MPX // Intel MPX (Memory Protection Extensions) MSRIRC // Instruction Retired Counter MSR available MSR_PAGEFLUSH // Page Flush MSR available @@ -153,6 +177,9 @@ const ( OSXSAVE // XSAVE enabled by OS PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption POPCNT // POPCNT instruction + PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled + PREFETCHI // PREFETCHIT0/1 instructions + PSFD // AMD: Predictive Store Forward Disable RDPRU // RDPRU instruction supported RDRAND // RDRAND instruction is available RDSEED // RDSEED instruction is available @@ -172,6 +199,8 @@ const ( SHA // Intel SHA Extensions SME // AMD Secure Memory Encryption supported SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced + SPEC_CTRL_SSBD // Speculative Store Bypass Disable + SRBDS_CTRL // SRBDS mitigation MSR available SSE // SSE functions SSE2 // P4 SSE functions SSE3 // Prescott SSE3 functions @@ -180,6 +209,7 @@ const ( SSE4A // AMD Barcelona microarchitecture SSE4a instructions SSSE3 // Conroe SSSE3 functions STIBP // Single Thread Indirect Branch Predictors + STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On STOSB_SHORT // Fast short STOSB SUCCOR // Software uncorrectable error containment and recovery capability. SVM // AMD Secure Virtual Machine @@ -192,8 +222,9 @@ const ( SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. SYSEE // SYSENTER and SYSEXIT instructions TBM // AMD Trailing Bit Manipulation - TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. + TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 TSXLDTRK // Intel TSX Suspend Load Address Tracking VAES // Vector AES. AVX(512) versions requires additional checks. @@ -358,7 +389,7 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool { // Has allows for checking a single feature. // Should be inlined by the compiler. -func (c CPUInfo) Has(id FeatureID) bool { +func (c *CPUInfo) Has(id FeatureID) bool { return c.featureSet.inSet(id) } @@ -372,26 +403,47 @@ func (c CPUInfo) AnyOf(ids ...FeatureID) bool { return false } +// Features contains several features combined for a fast check using +// CpuInfo.HasAll +type Features *flagSet + +// CombineFeatures allows to combine several features for a close to constant time lookup. +func CombineFeatures(ids ...FeatureID) Features { + var v flagSet + for _, id := range ids { + v.set(id) + } + return &v +} + +func (c *CPUInfo) HasAll(f Features) bool { + return c.featureSet.hasSetP(f) +} + // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels -var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2) -var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) -var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) -var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) +var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) +var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) +var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) +var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) +var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) // X64Level returns the microarchitecture level detected on the CPU. // If features are lacking or non x64 mode, 0 is returned. // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels func (c CPUInfo) X64Level() int { - if c.featureSet.hasSet(level4Features) { + if !c.featureSet.hasOneOf(oneOfLevel) { + return 0 + } + if c.featureSet.hasSetP(level4Features) { return 4 } - if c.featureSet.hasSet(level3Features) { + if c.featureSet.hasSetP(level3Features) { return 3 } - if c.featureSet.hasSet(level2Features) { + if c.featureSet.hasSetP(level2Features) { return 2 } - if c.featureSet.hasSet(level1Features) { + if c.featureSet.hasSetP(level1Features) { return 1 } return 0 @@ -555,7 +607,7 @@ const flagMask = flagBits - 1 // flagSet contains detected cpu features and characteristics in an array of flags type flagSet [(lastID + flagMask) / flagBits]flags -func (s flagSet) inSet(feat FeatureID) bool { +func (s *flagSet) inSet(feat FeatureID) bool { return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 } @@ -585,7 +637,17 @@ func (s *flagSet) or(other flagSet) { } // hasSet returns whether all features are present. -func (s flagSet) hasSet(other flagSet) bool { +func (s *flagSet) hasSet(other flagSet) bool { + for i, v := range other[:] { + if s[i]&v != v { + return false + } + } + return true +} + +// hasSet returns whether all features are present. +func (s *flagSet) hasSetP(other *flagSet) bool { for i, v := range other[:] { if s[i]&v != v { return false @@ -594,8 +656,18 @@ func (s flagSet) hasSet(other flagSet) bool { return true } +// hasOneOf returns whether one or more features are present. +func (s *flagSet) hasOneOf(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != 0 { + return true + } + } + return false +} + // nEnabled will return the number of enabled flags. -func (s flagSet) nEnabled() (n int) { +func (s *flagSet) nEnabled() (n int) { for _, v := range s[:] { n += bits.OnesCount64(uint64(v)) } @@ -1093,21 +1165,36 @@ func support() flagSet { fs.setIf(ecx&(1<<30) != 0, SGXLC) // CPUID.(EAX=7, ECX=0).EDX + fs.setIf(edx&(1<<4) != 0, FSRM) + fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) + fs.setIf(edx&(1<<10) != 0, MD_CLEAR) fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) fs.setIf(edx&(1<<14) != 0, SERIALIZE) + fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) fs.setIf(edx&(1<<16) != 0, TSXLDTRK) fs.setIf(edx&(1<<18) != 0, PCONFIG) fs.setIf(edx&(1<<20) != 0, CETIBT) fs.setIf(edx&(1<<26) != 0, IBPB) fs.setIf(edx&(1<<27) != 0, STIBP) + fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) + fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) + fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) + fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) + + // CPUID.(EAX=7, ECX=1).EDX + fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8) + fs.setIf(edx&(1<<5) != 0, AVXNECONVERT) + fs.setIf(edx&(1<<14) != 0, PREFETCHI) - // CPUID.(EAX=7, ECX=1) + // CPUID.(EAX=7, ECX=1).EAX eax1, _, _, _ := cpuidex(7, 1) fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) + fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) fs.setIf(eax1&(1<<22) != 0, HRESET) + fs.setIf(eax1&(1<<23) != 0, AVXIFMA) fs.setIf(eax1&(1<<26) != 0, LAM) // Only detect AVX-512 features if XGETBV is supported @@ -1145,9 +1232,15 @@ func support() flagSet { fs.setIf(edx&(1<<25) != 0, AMXINT8) // eax1 = CPUID.(EAX=7, ECX=1).EAX fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + fs.setIf(eax1&(1<<21) != 0, AMXFP16) } } + + // CPUID.(EAX=7, ECX=2) + _, _, _, edx = cpuidex(7, 2) + fs.setIf(edx&(1<<5) != 0, MCDT_NO) } + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) // EAX // Bit 00: XSAVEOPT is available. @@ -1212,9 +1305,21 @@ func support() flagSet { if maxExtendedFunction() >= 0x80000008 { _, b, _, _ := cpuid(0x80000008) + fs.setIf(b&(1<<28) != 0, PSFD) + fs.setIf(b&(1<<27) != 0, CPPC) + fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) + fs.setIf(b&(1<<23) != 0, PPIN) + fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) + fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) + fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) + fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) + fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) + fs.setIf(b&(1<<15) != 0, STIBP) + fs.setIf(b&(1<<14) != 0, IBRS) + fs.setIf((b&(1<<13)) != 0, INT_WBINVD) + fs.setIf(b&(1<<12) != 0, IBPB) fs.setIf((b&(1<<9)) != 0, WBNOINVD) fs.setIf((b&(1<<8)) != 0, MCOMMIT) - fs.setIf((b&(1<<13)) != 0, INT_WBINVD) fs.setIf((b&(1<<4)) != 0, RDPRU) fs.setIf((b&(1<<3)) != 0, INVLPGB) fs.setIf((b&(1<<1)) != 0, MSRIRC) @@ -1235,6 +1340,13 @@ func support() flagSet { fs.setIf((edx>>12)&1 == 1, SVMPFT) } + if maxExtendedFunction() >= 0x8000001a { + eax, _, _, _ := cpuid(0x8000001a) + fs.setIf((eax>>0)&1 == 1, FP128) + fs.setIf((eax>>1)&1 == 1, MOVU) + fs.setIf((eax>>2)&1 == 1, FP256) + } + if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { eax, _, _, _ := cpuid(0x8000001b) fs.setIf((eax>>0)&1 == 1, IBSFFV) @@ -1245,6 +1357,10 @@ func support() flagSet { fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) + fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) + fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) + fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. + fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) } if maxExtendedFunction() >= 0x8000001f && vend == AMD { |