diff options
Diffstat (limited to 'vendor/github.com/klauspost/cpuid')
22 files changed, 2315 insertions, 1830 deletions
diff --git a/vendor/github.com/klauspost/cpuid/.travis.yml b/vendor/github.com/klauspost/cpuid/.travis.yml deleted file mode 100644 index 77d975fe2..000000000 --- a/vendor/github.com/klauspost/cpuid/.travis.yml +++ /dev/null @@ -1,46 +0,0 @@ -language: go - -os: - - linux - - osx - - windows - -arch: - - amd64 - - arm64 - -go: - - 1.12.x - - 1.13.x - - 1.14.x - - master - -script: - - go vet ./... - - go test -race ./... - - go test -tags=noasm ./... - -stages: - - gofmt - - test - -matrix: - allow_failures: - - go: 'master' - fast_finish: true - include: - - stage: gofmt - go: 1.14.x - os: linux - arch: amd64 - script: - - diff <(gofmt -d .) <(printf "") - - diff <(gofmt -d ./private) <(printf "") - - go install github.com/klauspost/asmfmt/cmd/asmfmt - - diff <(asmfmt -d .) <(printf "") - - stage: i386 - go: 1.14.x - os: linux - arch: amd64 - script: - - GOOS=linux GOARCH=386 go test . diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md deleted file mode 100644 index 38d4a8b93..000000000 --- a/vendor/github.com/klauspost/cpuid/README.md +++ /dev/null @@ -1,191 +0,0 @@ -# cpuid -Package cpuid provides information about the CPU running the current program. - -CPU features are detected on startup, and kept for fast access through the life of the application. -Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use. - -You can access the CPU information by accessing the shared CPU variable of the cpuid library. - -Package home: https://github.com/klauspost/cpuid - -[![GoDoc][1]][2] [![Build Status][3]][4] - -[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg -[2]: https://godoc.org/github.com/klauspost/cpuid -[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master -[4]: https://travis-ci.org/klauspost/cpuid - -# features - -## x86 CPU Instructions -* **CMOV** (i686 CMOV) -* **NX** (NX (No-Execute) bit) -* **AMD3DNOW** (AMD 3DNOW) -* **AMD3DNOWEXT** (AMD 3DNowExt) -* **MMX** (standard MMX) -* **MMXEXT** (SSE integer functions or AMD MMX ext) -* **SSE** (SSE functions) -* **SSE2** (P4 SSE functions) -* **SSE3** (Prescott SSE3 functions) -* **SSSE3** (Conroe SSSE3 functions) -* **SSE4** (Penryn SSE4.1 functions) -* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions) -* **SSE42** (Nehalem SSE4.2 functions) -* **AVX** (AVX functions) -* **AVX2** (AVX2 functions) -* **FMA3** (Intel FMA 3) -* **FMA4** (Bulldozer FMA4 functions) -* **XOP** (Bulldozer XOP functions) -* **F16C** (Half-precision floating-point conversion) -* **BMI1** (Bit Manipulation Instruction Set 1) -* **BMI2** (Bit Manipulation Instruction Set 2) -* **TBM** (AMD Trailing Bit Manipulation) -* **LZCNT** (LZCNT instruction) -* **POPCNT** (POPCNT instruction) -* **AESNI** (Advanced Encryption Standard New Instructions) -* **CLMUL** (Carry-less Multiplication) -* **HTT** (Hyperthreading (enabled)) -* **HLE** (Hardware Lock Elision) -* **RTM** (Restricted Transactional Memory) -* **RDRAND** (RDRAND instruction is available) -* **RDSEED** (RDSEED instruction is available) -* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions)) -* **SHA** (Intel SHA Extensions) -* **AVX512F** (AVX-512 Foundation) -* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions) -* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions) -* **AVX512PF** (AVX-512 Prefetch Instructions) -* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions) -* **AVX512CD** (AVX-512 Conflict Detection Instructions) -* **AVX512BW** (AVX-512 Byte and Word Instructions) -* **AVX512VL** (AVX-512 Vector Length Extensions) -* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions) -* **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2) -* **AVX512VNNI** (AVX-512 Vector Neural Network Instructions) -* **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword) -* **GFNI** (Galois Field New Instructions) -* **VAES** (Vector AES) -* **AVX512BITALG** (AVX-512 Bit Algorithms) -* **VPCLMULQDQ** (Carry-Less Multiplication Quadword) -* **AVX512BF16** (AVX-512 BFLOAT16 Instructions) -* **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q) -* **MPX** (Intel MPX (Memory Protection Extensions)) -* **ERMS** (Enhanced REP MOVSB/STOSB) -* **RDTSCP** (RDTSCP Instruction) -* **CX16** (CMPXCHG16B Instruction) -* **SGX** (Software Guard Extensions, with activation details) -* **VMX** (Virtual Machine Extensions) - -## Performance -* **RDTSCP()** Returns current cycle count. Can be used for benchmarking. -* **SSE2SLOW** (SSE2 is supported, but usually not faster) -* **SSE3SLOW** (SSE3 is supported, but usually not faster) -* **ATOM** (Atom processor, some SSSE3 instructions are slower) -* **Cache line** (Probable size of a cache line). -* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs. - -## ARM CPU features - -# ARM FEATURE DETECTION DISABLED! - -See [#52](https://github.com/klauspost/cpuid/issues/52). - -Currently only `arm64` platforms are implemented. - -* **FP** Single-precision and double-precision floating point -* **ASIMD** Advanced SIMD -* **EVTSTRM** Generic timer -* **AES** AES instructions -* **PMULL** Polynomial Multiply instructions (PMULL/PMULL2) -* **SHA1** SHA-1 instructions (SHA1C, etc) -* **SHA2** SHA-2 instructions (SHA256H, etc) -* **CRC32** CRC32/CRC32C instructions -* **ATOMICS** Large System Extensions (LSE) -* **FPHP** Half-precision floating point -* **ASIMDHP** Advanced SIMD half-precision floating point -* **ARMCPUID** Some CPU ID registers readable at user-level -* **ASIMDRDM** Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) -* **JSCVT** Javascript-style double->int convert (FJCVTZS) -* **FCMA** Floating point complex number addition and multiplication -* **LRCPC** Weaker release consistency (LDAPR, etc) -* **DCPOP** Data cache clean to Point of Persistence (DC CVAP) -* **SHA3** SHA-3 instructions (EOR3, RAXI, XAR, BCAX) -* **SM3** SM3 instructions -* **SM4** SM4 instructions -* **ASIMDDP** SIMD Dot Product -* **SHA512** SHA512 instructions -* **SVE** Scalable Vector Extension -* **GPA** Generic Pointer Authentication - -## Cpu Vendor/VM -* **Intel** -* **AMD** -* **VIA** -* **Transmeta** -* **NSC** -* **KVM** (Kernel-based Virtual Machine) -* **MSVM** (Microsoft Hyper-V or Windows Virtual PC) -* **VMware** -* **XenHVM** -* **Bhyve** -* **Hygon** - -# installing - -```go get github.com/klauspost/cpuid``` - -# example - -```Go -package main - -import ( - "fmt" - "github.com/klauspost/cpuid" -) - -func main() { - // Print basic CPU information: - fmt.Println("Name:", cpuid.CPU.BrandName) - fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores) - fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore) - fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores) - fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model) - fmt.Println("Features:", cpuid.CPU.Features) - fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine) - fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes") - fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes") - fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes") - fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes") - - // Test if we have a specific feature: - if cpuid.CPU.SSE() { - fmt.Println("We have Streaming SIMD Extensions") - } -} -``` - -Sample output: -``` ->go run main.go -Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz -PhysicalCores: 2 -ThreadsPerCore: 2 -LogicalCores: 4 -Family 6 Model: 42 -Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL -Cacheline bytes: 64 -We have Streaming SIMD Extensions -``` - -# private package - -In the "private" folder you can find an autogenerated version of the library you can include in your own packages. - -For this purpose all exports are removed, and functions and constants are lowercased. - -This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages. - -# license - -This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go deleted file mode 100644 index 208b3e79b..000000000 --- a/vendor/github.com/klauspost/cpuid/cpuid.go +++ /dev/null @@ -1,1504 +0,0 @@ -// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. - -// Package cpuid provides information about the CPU running the current program. -// -// CPU features are detected on startup, and kept for fast access through the life of the application. -// Currently x86 / x64 (AMD64) as well as arm64 is supported. -// -// You can access the CPU information by accessing the shared CPU variable of the cpuid library. -// -// Package home: https://github.com/klauspost/cpuid -package cpuid - -import ( - "math" - "strings" -) - -// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf -// and Processor Programming Reference (PPR) - -// Vendor is a representation of a CPU vendor. -type Vendor int - -const ( - Other Vendor = iota - Intel - AMD - VIA - Transmeta - NSC - KVM // Kernel-based Virtual Machine - MSVM // Microsoft Hyper-V or Windows Virtual PC - VMware - XenHVM - Bhyve - Hygon - SiS - RDC -) - -const ( - CMOV = 1 << iota // i686 CMOV - NX // NX (No-Execute) bit - AMD3DNOW // AMD 3DNOW - AMD3DNOWEXT // AMD 3DNowExt - MMX // standard MMX - MMXEXT // SSE integer functions or AMD MMX ext - SSE // SSE functions - SSE2 // P4 SSE functions - SSE3 // Prescott SSE3 functions - SSSE3 // Conroe SSSE3 functions - SSE4 // Penryn SSE4.1 functions - SSE4A // AMD Barcelona microarchitecture SSE4a instructions - SSE42 // Nehalem SSE4.2 functions - AVX // AVX functions - AVX2 // AVX2 functions - FMA3 // Intel FMA 3 - FMA4 // Bulldozer FMA4 functions - XOP // Bulldozer XOP functions - F16C // Half-precision floating-point conversion - BMI1 // Bit Manipulation Instruction Set 1 - BMI2 // Bit Manipulation Instruction Set 2 - TBM // AMD Trailing Bit Manipulation - LZCNT // LZCNT instruction - POPCNT // POPCNT instruction - AESNI // Advanced Encryption Standard New Instructions - CLMUL // Carry-less Multiplication - HTT // Hyperthreading (enabled) - HLE // Hardware Lock Elision - RTM // Restricted Transactional Memory - RDRAND // RDRAND instruction is available - RDSEED // RDSEED instruction is available - ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) - SHA // Intel SHA Extensions - AVX512F // AVX-512 Foundation - AVX512DQ // AVX-512 Doubleword and Quadword Instructions - AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions - AVX512PF // AVX-512 Prefetch Instructions - AVX512ER // AVX-512 Exponential and Reciprocal Instructions - AVX512CD // AVX-512 Conflict Detection Instructions - AVX512BW // AVX-512 Byte and Word Instructions - AVX512VL // AVX-512 Vector Length Extensions - AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions - AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 - AVX512VNNI // AVX-512 Vector Neural Network Instructions - AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword - GFNI // Galois Field New Instructions - VAES // Vector AES - AVX512BITALG // AVX-512 Bit Algorithms - VPCLMULQDQ // Carry-Less Multiplication Quadword - AVX512BF16 // AVX-512 BFLOAT16 Instructions - AVX512VP2INTERSECT // AVX-512 Intersect for D/Q - MPX // Intel MPX (Memory Protection Extensions) - ERMS // Enhanced REP MOVSB/STOSB - RDTSCP // RDTSCP Instruction - CX16 // CMPXCHG16B Instruction - SGX // Software Guard Extensions - SGXLC // Software Guard Extensions Launch Control - IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) - STIBP // Single Thread Indirect Branch Predictors - VMX // Virtual Machine Extensions - - // Performance indicators - SSE2SLOW // SSE2 is supported, but usually not faster - SSE3SLOW // SSE3 is supported, but usually not faster - ATOM // Atom processor, some SSSE3 instructions are slower -) - -var flagNames = map[Flags]string{ - CMOV: "CMOV", // i686 CMOV - NX: "NX", // NX (No-Execute) bit - AMD3DNOW: "AMD3DNOW", // AMD 3DNOW - AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt - MMX: "MMX", // Standard MMX - MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext - SSE: "SSE", // SSE functions - SSE2: "SSE2", // P4 SSE2 functions - SSE3: "SSE3", // Prescott SSE3 functions - SSSE3: "SSSE3", // Conroe SSSE3 functions - SSE4: "SSE4.1", // Penryn SSE4.1 functions - SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions - SSE42: "SSE4.2", // Nehalem SSE4.2 functions - AVX: "AVX", // AVX functions - AVX2: "AVX2", // AVX functions - FMA3: "FMA3", // Intel FMA 3 - FMA4: "FMA4", // Bulldozer FMA4 functions - XOP: "XOP", // Bulldozer XOP functions - F16C: "F16C", // Half-precision floating-point conversion - BMI1: "BMI1", // Bit Manipulation Instruction Set 1 - BMI2: "BMI2", // Bit Manipulation Instruction Set 2 - TBM: "TBM", // AMD Trailing Bit Manipulation - LZCNT: "LZCNT", // LZCNT instruction - POPCNT: "POPCNT", // POPCNT instruction - AESNI: "AESNI", // Advanced Encryption Standard New Instructions - CLMUL: "CLMUL", // Carry-less Multiplication - HTT: "HTT", // Hyperthreading (enabled) - HLE: "HLE", // Hardware Lock Elision - RTM: "RTM", // Restricted Transactional Memory - RDRAND: "RDRAND", // RDRAND instruction is available - RDSEED: "RDSEED", // RDSEED instruction is available - ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) - SHA: "SHA", // Intel SHA Extensions - AVX512F: "AVX512F", // AVX-512 Foundation - AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions - AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions - AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions - AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions - AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions - AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions - AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions - AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions - AVX512VBMI2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2 - AVX512VNNI: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions - AVX512VPOPCNTDQ: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword - GFNI: "GFNI", // Galois Field New Instructions - VAES: "VAES", // Vector AES - AVX512BITALG: "AVX512BITALG", // AVX-512 Bit Algorithms - VPCLMULQDQ: "VPCLMULQDQ", // Carry-Less Multiplication Quadword - AVX512BF16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction - AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q - MPX: "MPX", // Intel MPX (Memory Protection Extensions) - ERMS: "ERMS", // Enhanced REP MOVSB/STOSB - RDTSCP: "RDTSCP", // RDTSCP Instruction - CX16: "CX16", // CMPXCHG16B Instruction - SGX: "SGX", // Software Guard Extensions - SGXLC: "SGXLC", // Software Guard Extensions Launch Control - IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier - STIBP: "STIBP", // Single Thread Indirect Branch Predictors - VMX: "VMX", // Virtual Machine Extensions - - // Performance indicators - SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster - SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster - ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower - -} - -/* all special features for arm64 should be defined here */ -const ( - /* extension instructions */ - FP ArmFlags = 1 << iota - ASIMD - EVTSTRM - AES - PMULL - SHA1 - SHA2 - CRC32 - ATOMICS - FPHP - ASIMDHP - ARMCPUID - ASIMDRDM - JSCVT - FCMA - LRCPC - DCPOP - SHA3 - SM3 - SM4 - ASIMDDP - SHA512 - SVE - GPA -) - -var flagNamesArm = map[ArmFlags]string{ - FP: "FP", // Single-precision and double-precision floating point - ASIMD: "ASIMD", // Advanced SIMD - EVTSTRM: "EVTSTRM", // Generic timer - AES: "AES", // AES instructions - PMULL: "PMULL", // Polynomial Multiply instructions (PMULL/PMULL2) - SHA1: "SHA1", // SHA-1 instructions (SHA1C, etc) - SHA2: "SHA2", // SHA-2 instructions (SHA256H, etc) - CRC32: "CRC32", // CRC32/CRC32C instructions - ATOMICS: "ATOMICS", // Large System Extensions (LSE) - FPHP: "FPHP", // Half-precision floating point - ASIMDHP: "ASIMDHP", // Advanced SIMD half-precision floating point - ARMCPUID: "CPUID", // Some CPU ID registers readable at user-level - ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) - JSCVT: "JSCVT", // Javascript-style double->int convert (FJCVTZS) - FCMA: "FCMA", // Floatin point complex number addition and multiplication - LRCPC: "LRCPC", // Weaker release consistency (LDAPR, etc) - DCPOP: "DCPOP", // Data cache clean to Point of Persistence (DC CVAP) - SHA3: "SHA3", // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) - SM3: "SM3", // SM3 instructions - SM4: "SM4", // SM4 instructions - ASIMDDP: "ASIMDDP", // SIMD Dot Product - SHA512: "SHA512", // SHA512 instructions - SVE: "SVE", // Scalable Vector Extension - GPA: "GPA", // Generic Pointer Authentication -} - -// CPUInfo contains information about the detected system CPU. -type CPUInfo struct { - BrandName string // Brand name reported by the CPU - VendorID Vendor // Comparable CPU vendor ID - VendorString string // Raw vendor string. - Features Flags // Features of the CPU (x64) - Arm ArmFlags // Features of the CPU (arm) - PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. - ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. - LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. - Family int // CPU family number - Model int // CPU model number - CacheLine int // Cache line size in bytes. Will be 0 if undetectable. - Hz int64 // Clock speed, if known - Cache struct { - L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected - L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected - L2 int // L2 Cache (per core or shared). Will be -1 if undetected - L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected - } - SGX SGXSupport - maxFunc uint32 - maxExFunc uint32 -} - -var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) -var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) -var xgetbv func(index uint32) (eax, edx uint32) -var rdtscpAsm func() (eax, ebx, ecx, edx uint32) - -// CPU contains information about the CPU as detected on startup, -// or when Detect last was called. -// -// Use this as the primary entry point to you data. -var CPU CPUInfo - -func init() { - initCPU() - Detect() -} - -// Detect will re-detect current CPU info. -// This will replace the content of the exported CPU variable. -// -// Unless you expect the CPU to change while you are running your program -// you should not need to call this function. -// If you call this, you must ensure that no other goroutine is accessing the -// exported CPU variable. -func Detect() { - // Set defaults - CPU.ThreadsPerCore = 1 - CPU.Cache.L1I = -1 - CPU.Cache.L1D = -1 - CPU.Cache.L2 = -1 - CPU.Cache.L3 = -1 - addInfo(&CPU) -} - -// Generated here: http://play.golang.org/p/BxFH2Gdc0G - -// Cmov indicates support of CMOV instructions -func (c CPUInfo) Cmov() bool { - return c.Features&CMOV != 0 -} - -// Amd3dnow indicates support of AMD 3DNOW! instructions -func (c CPUInfo) Amd3dnow() bool { - return c.Features&AMD3DNOW != 0 -} - -// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions -func (c CPUInfo) Amd3dnowExt() bool { - return c.Features&AMD3DNOWEXT != 0 -} - -// VMX indicates support of VMX -func (c CPUInfo) VMX() bool { - return c.Features&VMX != 0 -} - -// MMX indicates support of MMX instructions -func (c CPUInfo) MMX() bool { - return c.Features&MMX != 0 -} - -// MMXExt indicates support of MMXEXT instructions -// (SSE integer functions or AMD MMX ext) -func (c CPUInfo) MMXExt() bool { - return c.Features&MMXEXT != 0 -} - -// SSE indicates support of SSE instructions -func (c CPUInfo) SSE() bool { - return c.Features&SSE != 0 -} - -// SSE2 indicates support of SSE 2 instructions -func (c CPUInfo) SSE2() bool { - return c.Features&SSE2 != 0 -} - -// SSE3 indicates support of SSE 3 instructions -func (c CPUInfo) SSE3() bool { - return c.Features&SSE3 != 0 -} - -// SSSE3 indicates support of SSSE 3 instructions -func (c CPUInfo) SSSE3() bool { - return c.Features&SSSE3 != 0 -} - -// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions -func (c CPUInfo) SSE4() bool { - return c.Features&SSE4 != 0 -} - -// SSE42 indicates support of SSE4.2 instructions -func (c CPUInfo) SSE42() bool { - return c.Features&SSE42 != 0 -} - -// AVX indicates support of AVX instructions -// and operating system support of AVX instructions -func (c CPUInfo) AVX() bool { - return c.Features&AVX != 0 -} - -// AVX2 indicates support of AVX2 instructions -func (c CPUInfo) AVX2() bool { - return c.Features&AVX2 != 0 -} - -// FMA3 indicates support of FMA3 instructions -func (c CPUInfo) FMA3() bool { - return c.Features&FMA3 != 0 -} - -// FMA4 indicates support of FMA4 instructions -func (c CPUInfo) FMA4() bool { - return c.Features&FMA4 != 0 -} - -// XOP indicates support of XOP instructions -func (c CPUInfo) XOP() bool { - return c.Features&XOP != 0 -} - -// F16C indicates support of F16C instructions -func (c CPUInfo) F16C() bool { - return c.Features&F16C != 0 -} - -// BMI1 indicates support of BMI1 instructions -func (c CPUInfo) BMI1() bool { - return c.Features&BMI1 != 0 -} - -// BMI2 indicates support of BMI2 instructions -func (c CPUInfo) BMI2() bool { - return c.Features&BMI2 != 0 -} - -// TBM indicates support of TBM instructions -// (AMD Trailing Bit Manipulation) -func (c CPUInfo) TBM() bool { - return c.Features&TBM != 0 -} - -// Lzcnt indicates support of LZCNT instruction -func (c CPUInfo) Lzcnt() bool { - return c.Features&LZCNT != 0 -} - -// Popcnt indicates support of POPCNT instruction -func (c CPUInfo) Popcnt() bool { - return c.Features&POPCNT != 0 -} - -// HTT indicates the processor has Hyperthreading enabled -func (c CPUInfo) HTT() bool { - return c.Features&HTT != 0 -} - -// SSE2Slow indicates that SSE2 may be slow on this processor -func (c CPUInfo) SSE2Slow() bool { - return c.Features&SSE2SLOW != 0 -} - -// SSE3Slow indicates that SSE3 may be slow on this processor -func (c CPUInfo) SSE3Slow() bool { - return c.Features&SSE3SLOW != 0 -} - -// AesNi indicates support of AES-NI instructions -// (Advanced Encryption Standard New Instructions) -func (c CPUInfo) AesNi() bool { - return c.Features&AESNI != 0 -} - -// Clmul indicates support of CLMUL instructions -// (Carry-less Multiplication) -func (c CPUInfo) Clmul() bool { - return c.Features&CLMUL != 0 -} - -// NX indicates support of NX (No-Execute) bit -func (c CPUInfo) NX() bool { - return c.Features&NX != 0 -} - -// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions -func (c CPUInfo) SSE4A() bool { - return c.Features&SSE4A != 0 -} - -// HLE indicates support of Hardware Lock Elision -func (c CPUInfo) HLE() bool { - return c.Features&HLE != 0 -} - -// RTM indicates support of Restricted Transactional Memory -func (c CPUInfo) RTM() bool { - return c.Features&RTM != 0 -} - -// Rdrand indicates support of RDRAND instruction is available -func (c CPUInfo) Rdrand() bool { - return c.Features&RDRAND != 0 -} - -// Rdseed indicates support of RDSEED instruction is available -func (c CPUInfo) Rdseed() bool { - return c.Features&RDSEED != 0 -} - -// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) -func (c CPUInfo) ADX() bool { - return c.Features&ADX != 0 -} - -// SHA indicates support of Intel SHA Extensions -func (c CPUInfo) SHA() bool { - return c.Features&SHA != 0 -} - -// AVX512F indicates support of AVX-512 Foundation -func (c CPUInfo) AVX512F() bool { - return c.Features&AVX512F != 0 -} - -// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions -func (c CPUInfo) AVX512DQ() bool { - return c.Features&AVX512DQ != 0 -} - -// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions -func (c CPUInfo) AVX512IFMA() bool { - return c.Features&AVX512IFMA != 0 -} - -// AVX512PF indicates support of AVX-512 Prefetch Instructions -func (c CPUInfo) AVX512PF() bool { - return c.Features&AVX512PF != 0 -} - -// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions -func (c CPUInfo) AVX512ER() bool { - return c.Features&AVX512ER != 0 -} - -// AVX512CD indicates support of AVX-512 Conflict Detection Instructions -func (c CPUInfo) AVX512CD() bool { - return c.Features&AVX512CD != 0 -} - -// AVX512BW indicates support of AVX-512 Byte and Word Instructions -func (c CPUInfo) AVX512BW() bool { - return c.Features&AVX512BW != 0 -} - -// AVX512VL indicates support of AVX-512 Vector Length Extensions -func (c CPUInfo) AVX512VL() bool { - return c.Features&AVX512VL != 0 -} - -// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions -func (c CPUInfo) AVX512VBMI() bool { - return c.Features&AVX512VBMI != 0 -} - -// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2 -func (c CPUInfo) AVX512VBMI2() bool { - return c.Features&AVX512VBMI2 != 0 -} - -// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions -func (c CPUInfo) AVX512VNNI() bool { - return c.Features&AVX512VNNI != 0 -} - -// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword -func (c CPUInfo) AVX512VPOPCNTDQ() bool { - return c.Features&AVX512VPOPCNTDQ != 0 -} - -// GFNI indicates support of Galois Field New Instructions -func (c CPUInfo) GFNI() bool { - return c.Features&GFNI != 0 -} - -// VAES indicates support of Vector AES -func (c CPUInfo) VAES() bool { - return c.Features&VAES != 0 -} - -// AVX512BITALG indicates support of AVX-512 Bit Algorithms -func (c CPUInfo) AVX512BITALG() bool { - return c.Features&AVX512BITALG != 0 -} - -// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword -func (c CPUInfo) VPCLMULQDQ() bool { - return c.Features&VPCLMULQDQ != 0 -} - -// AVX512BF16 indicates support of -func (c CPUInfo) AVX512BF16() bool { - return c.Features&AVX512BF16 != 0 -} - -// AVX512VP2INTERSECT indicates support of -func (c CPUInfo) AVX512VP2INTERSECT() bool { - return c.Features&AVX512VP2INTERSECT != 0 -} - -// MPX indicates support of Intel MPX (Memory Protection Extensions) -func (c CPUInfo) MPX() bool { - return c.Features&MPX != 0 -} - -// ERMS indicates support of Enhanced REP MOVSB/STOSB -func (c CPUInfo) ERMS() bool { - return c.Features&ERMS != 0 -} - -// RDTSCP Instruction is available. -func (c CPUInfo) RDTSCP() bool { - return c.Features&RDTSCP != 0 -} - -// CX16 indicates if CMPXCHG16B instruction is available. -func (c CPUInfo) CX16() bool { - return c.Features&CX16 != 0 -} - -// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection. -// So TSX simply checks that. -func (c CPUInfo) TSX() bool { - return c.Features&(HLE|RTM) == HLE|RTM -} - -// Atom indicates an Atom processor -func (c CPUInfo) Atom() bool { - return c.Features&ATOM != 0 -} - -// Intel returns true if vendor is recognized as Intel -func (c CPUInfo) Intel() bool { - return c.VendorID == Intel -} - -// AMD returns true if vendor is recognized as AMD -func (c CPUInfo) AMD() bool { - return c.VendorID == AMD -} - -// Hygon returns true if vendor is recognized as Hygon -func (c CPUInfo) Hygon() bool { - return c.VendorID == Hygon -} - -// Transmeta returns true if vendor is recognized as Transmeta -func (c CPUInfo) Transmeta() bool { - return c.VendorID == Transmeta -} - -// NSC returns true if vendor is recognized as National Semiconductor -func (c CPUInfo) NSC() bool { - return c.VendorID == NSC -} - -// VIA returns true if vendor is recognized as VIA -func (c CPUInfo) VIA() bool { - return c.VendorID == VIA -} - -// RTCounter returns the 64-bit time-stamp counter -// Uses the RDTSCP instruction. The value 0 is returned -// if the CPU does not support the instruction. -func (c CPUInfo) RTCounter() uint64 { - if !c.RDTSCP() { - return 0 - } - a, _, _, d := rdtscpAsm() - return uint64(a) | (uint64(d) << 32) -} - -// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. -// This variable is OS dependent, but on Linux contains information -// about the current cpu/core the code is running on. -// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. -func (c CPUInfo) Ia32TscAux() uint32 { - if !c.RDTSCP() { - return 0 - } - _, _, ecx, _ := rdtscpAsm() - return ecx -} - -// LogicalCPU will return the Logical CPU the code is currently executing on. -// This is likely to change when the OS re-schedules the running thread -// to another CPU. -// If the current core cannot be detected, -1 will be returned. -func (c CPUInfo) LogicalCPU() int { - if c.maxFunc < 1 { - return -1 - } - _, ebx, _, _ := cpuid(1) - return int(ebx >> 24) -} - -// hertz tries to compute the clock speed of the CPU. If leaf 15 is -// supported, use it, otherwise parse the brand string. Yes, really. -func hertz(model string) int64 { - mfi := maxFunctionID() - if mfi >= 0x15 { - eax, ebx, ecx, _ := cpuid(0x15) - if eax != 0 && ebx != 0 && ecx != 0 { - return int64((int64(ecx) * int64(ebx)) / int64(eax)) - } - } - // computeHz determines the official rated speed of a CPU from its brand - // string. This insanity is *actually the official documented way to do - // this according to Intel*, prior to leaf 0x15 existing. The official - // documentation only shows this working for exactly `x.xx` or `xxxx` - // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other - // sizes. - hz := strings.LastIndex(model, "Hz") - if hz < 3 { - return -1 - } - var multiplier int64 - switch model[hz-1] { - case 'M': - multiplier = 1000 * 1000 - case 'G': - multiplier = 1000 * 1000 * 1000 - case 'T': - multiplier = 1000 * 1000 * 1000 * 1000 - } - if multiplier == 0 { - return -1 - } - freq := int64(0) - divisor := int64(0) - decimalShift := int64(1) - var i int - for i = hz - 2; i >= 0 && model[i] != ' '; i-- { - if model[i] >= '0' && model[i] <= '9' { - freq += int64(model[i]-'0') * decimalShift - decimalShift *= 10 - } else if model[i] == '.' { - if divisor != 0 { - return -1 - } - divisor = decimalShift - } else { - return -1 - } - } - // we didn't find a space - if i < 0 { - return -1 - } - if divisor != 0 { - return (freq * multiplier) / divisor - } - return freq * multiplier -} - -// VM Will return true if the cpu id indicates we are in -// a virtual machine. This is only a hint, and will very likely -// have many false negatives. -func (c CPUInfo) VM() bool { - switch c.VendorID { - case MSVM, KVM, VMware, XenHVM, Bhyve: - return true - } - return false -} - -// Flags contains detected cpu features and characteristics -type Flags uint64 - -// ArmFlags contains detected ARM cpu features and characteristics -type ArmFlags uint64 - -// String returns a string representation of the detected -// CPU features. -func (f Flags) String() string { - return strings.Join(f.Strings(), ",") -} - -// Strings returns an array of the detected features. -func (f Flags) Strings() []string { - r := make([]string, 0, 20) - for i := uint(0); i < 64; i++ { - key := Flags(1 << i) - val := flagNames[key] - if f&key != 0 { - r = append(r, val) - } - } - return r -} - -// String returns a string representation of the detected -// CPU features. -func (f ArmFlags) String() string { - return strings.Join(f.Strings(), ",") -} - -// Strings returns an array of the detected features. -func (f ArmFlags) Strings() []string { - r := make([]string, 0, 20) - for i := uint(0); i < 64; i++ { - key := ArmFlags(1 << i) - val := flagNamesArm[key] - if f&key != 0 { - r = append(r, val) - } - } - return r -} -func maxExtendedFunction() uint32 { - eax, _, _, _ := cpuid(0x80000000) - return eax -} - -func maxFunctionID() uint32 { - a, _, _, _ := cpuid(0) - return a -} - -func brandName() string { - if maxExtendedFunction() >= 0x80000004 { - v := make([]uint32, 0, 48) - for i := uint32(0); i < 3; i++ { - a, b, c, d := cpuid(0x80000002 + i) - v = append(v, a, b, c, d) - } - return strings.Trim(string(valAsString(v...)), " ") - } - return "unknown" -} - -func threadsPerCore() int { - mfi := maxFunctionID() - vend, _ := vendorID() - - if mfi < 0x4 || (vend != Intel && vend != AMD) { - return 1 - } - - if mfi < 0xb { - if vend != Intel { - return 1 - } - _, b, _, d := cpuid(1) - if (d & (1 << 28)) != 0 { - // v will contain logical core count - v := (b >> 16) & 255 - if v > 1 { - a4, _, _, _ := cpuid(4) - // physical cores - v2 := (a4 >> 26) + 1 - if v2 > 0 { - return int(v) / int(v2) - } - } - } - return 1 - } - _, b, _, _ := cpuidex(0xb, 0) - if b&0xffff == 0 { - return 1 - } - return int(b & 0xffff) -} - -func logicalCores() int { - mfi := maxFunctionID() - v, _ := vendorID() - switch v { - case Intel: - // Use this on old Intel processors - if mfi < 0xb { - if mfi < 1 { - return 0 - } - // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) - // that can be assigned to logical processors in a physical package. - // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. - _, ebx, _, _ := cpuid(1) - logical := (ebx >> 16) & 0xff - return int(logical) - } - _, b, _, _ := cpuidex(0xb, 1) - return int(b & 0xffff) - case AMD, Hygon: - _, b, _, _ := cpuid(1) - return int((b >> 16) & 0xff) - default: - return 0 - } -} - -func familyModel() (int, int) { - if maxFunctionID() < 0x1 { - return 0, 0 - } - eax, _, _, _ := cpuid(1) - family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) - model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) - return int(family), int(model) -} - -func physicalCores() int { - v, _ := vendorID() - switch v { - case Intel: - return logicalCores() / threadsPerCore() - case AMD, Hygon: - lc := logicalCores() - tpc := threadsPerCore() - if lc > 0 && tpc > 0 { - return lc / tpc - } - // The following is inaccurate on AMD EPYC 7742 64-Core Processor - - if maxExtendedFunction() >= 0x80000008 { - _, _, c, _ := cpuid(0x80000008) - return int(c&0xff) + 1 - } - } - return 0 -} - -// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID -var vendorMapping = map[string]Vendor{ - "AMDisbetter!": AMD, - "AuthenticAMD": AMD, - "CentaurHauls": VIA, - "GenuineIntel": Intel, - "TransmetaCPU": Transmeta, - "GenuineTMx86": Transmeta, - "Geode by NSC": NSC, - "VIA VIA VIA ": VIA, - "KVMKVMKVMKVM": KVM, - "Microsoft Hv": MSVM, - "VMwareVMware": VMware, - "XenVMMXenVMM": XenHVM, - "bhyve bhyve ": Bhyve, - "HygonGenuine": Hygon, - "Vortex86 SoC": SiS, - "SiS SiS SiS ": SiS, - "RiseRiseRise": SiS, - "Genuine RDC": RDC, -} - -func vendorID() (Vendor, string) { - _, b, c, d := cpuid(0) - v := string(valAsString(b, d, c)) - vend, ok := vendorMapping[v] - if !ok { - return Other, v - } - return vend, v -} - -func cacheLine() int { - if maxFunctionID() < 0x1 { - return 0 - } - - _, ebx, _, _ := cpuid(1) - cache := (ebx & 0xff00) >> 5 // cflush size - if cache == 0 && maxExtendedFunction() >= 0x80000006 { - _, _, ecx, _ := cpuid(0x80000006) - cache = ecx & 0xff // cacheline size - } - // TODO: Read from Cache and TLB Information - return int(cache) -} - -func (c *CPUInfo) cacheSize() { - c.Cache.L1D = -1 - c.Cache.L1I = -1 - c.Cache.L2 = -1 - c.Cache.L3 = -1 - vendor, _ := vendorID() - switch vendor { - case Intel: - if maxFunctionID() < 4 { - return - } - for i := uint32(0); ; i++ { - eax, ebx, ecx, _ := cpuidex(4, i) - cacheType := eax & 15 - if cacheType == 0 { - break - } - cacheLevel := (eax >> 5) & 7 - coherency := int(ebx&0xfff) + 1 - partitions := int((ebx>>12)&0x3ff) + 1 - associativity := int((ebx>>22)&0x3ff) + 1 - sets := int(ecx) + 1 - size := associativity * partitions * coherency * sets - switch cacheLevel { - case 1: - if cacheType == 1 { - // 1 = Data Cache - c.Cache.L1D = size - } else if cacheType == 2 { - // 2 = Instruction Cache - c.Cache.L1I = size - } else { - if c.Cache.L1D < 0 { - c.Cache.L1I = size - } - if c.Cache.L1I < 0 { - c.Cache.L1I = size - } - } - case 2: - c.Cache.L2 = size - case 3: - c.Cache.L3 = size - } - } - case AMD, Hygon: - // Untested. - if maxExtendedFunction() < 0x80000005 { - return - } - _, _, ecx, edx := cpuid(0x80000005) - c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) - c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) - - if maxExtendedFunction() < 0x80000006 { - return - } - _, _, ecx, _ = cpuid(0x80000006) - c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) - - // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties - if maxExtendedFunction() < 0x8000001D { - return - } - for i := uint32(0); i < math.MaxUint32; i++ { - eax, ebx, ecx, _ := cpuidex(0x8000001D, i) - - level := (eax >> 5) & 7 - cacheNumSets := ecx + 1 - cacheLineSize := 1 + (ebx & 2047) - cachePhysPartitions := 1 + ((ebx >> 12) & 511) - cacheNumWays := 1 + ((ebx >> 22) & 511) - - typ := eax & 15 - size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) - if typ == 0 { - return - } - - switch level { - case 1: - switch typ { - case 1: - // Data cache - c.Cache.L1D = size - case 2: - // Inst cache - c.Cache.L1I = size - default: - if c.Cache.L1D < 0 { - c.Cache.L1I = size - } - if c.Cache.L1I < 0 { - c.Cache.L1I = size - } - } - case 2: - c.Cache.L2 = size - case 3: - c.Cache.L3 = size - } - } - } - - return -} - -type SGXEPCSection struct { - BaseAddress uint64 - EPCSize uint64 -} - -type SGXSupport struct { - Available bool - LaunchControl bool - SGX1Supported bool - SGX2Supported bool - MaxEnclaveSizeNot64 int64 - MaxEnclaveSize64 int64 - EPCSections []SGXEPCSection -} - -func hasSGX(available, lc bool) (rval SGXSupport) { - rval.Available = available - - if !available { - return - } - - rval.LaunchControl = lc - - a, _, _, d := cpuidex(0x12, 0) - rval.SGX1Supported = a&0x01 != 0 - rval.SGX2Supported = a&0x02 != 0 - rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 - rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 - rval.EPCSections = make([]SGXEPCSection, 0) - - for subleaf := uint32(2); subleaf < 2+8; subleaf++ { - eax, ebx, ecx, edx := cpuidex(0x12, subleaf) - leafType := eax & 0xf - - if leafType == 0 { - // Invalid subleaf, stop iterating - break - } else if leafType == 1 { - // EPC Section subleaf - baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) - size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) - - section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} - rval.EPCSections = append(rval.EPCSections, section) - } - } - - return -} - -func support() Flags { - mfi := maxFunctionID() - vend, _ := vendorID() - if mfi < 0x1 { - return 0 - } - rval := uint64(0) - _, _, c, d := cpuid(1) - if (d & (1 << 15)) != 0 { - rval |= CMOV - } - if (d & (1 << 23)) != 0 { - rval |= MMX - } - if (d & (1 << 25)) != 0 { - rval |= MMXEXT - } - if (d & (1 << 25)) != 0 { - rval |= SSE - } - if (d & (1 << 26)) != 0 { - rval |= SSE2 - } - if (c & 1) != 0 { - rval |= SSE3 - } - if (c & (1 << 5)) != 0 { - rval |= VMX - } - if (c & 0x00000200) != 0 { - rval |= SSSE3 - } - if (c & 0x00080000) != 0 { - rval |= SSE4 - } - if (c & 0x00100000) != 0 { - rval |= SSE42 - } - if (c & (1 << 25)) != 0 { - rval |= AESNI - } - if (c & (1 << 1)) != 0 { - rval |= CLMUL - } - if c&(1<<23) != 0 { - rval |= POPCNT - } - if c&(1<<30) != 0 { - rval |= RDRAND - } - if c&(1<<29) != 0 { - rval |= F16C - } - if c&(1<<13) != 0 { - rval |= CX16 - } - if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { - if threadsPerCore() > 1 { - rval |= HTT - } - } - if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { - if threadsPerCore() > 1 { - rval |= HTT - } - } - // Check XGETBV, OXSAVE and AVX bits - if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { - // Check for OS support - eax, _ := xgetbv(0) - if (eax & 0x6) == 0x6 { - rval |= AVX - if (c & 0x00001000) != 0 { - rval |= FMA3 - } - } - } - - // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. - if mfi >= 7 { - _, ebx, ecx, edx := cpuidex(7, 0) - eax1, _, _, _ := cpuidex(7, 1) - if (rval&AVX) != 0 && (ebx&0x00000020) != 0 { - rval |= AVX2 - } - if (ebx & 0x00000008) != 0 { - rval |= BMI1 - if (ebx & 0x00000100) != 0 { - rval |= BMI2 - } - } - if ebx&(1<<2) != 0 { - rval |= SGX - } - if ebx&(1<<4) != 0 { - rval |= HLE - } - if ebx&(1<<9) != 0 { - rval |= ERMS - } - if ebx&(1<<11) != 0 { - rval |= RTM - } - if ebx&(1<<14) != 0 { - rval |= MPX - } - if ebx&(1<<18) != 0 { - rval |= RDSEED - } - if ebx&(1<<19) != 0 { - rval |= ADX - } - if ebx&(1<<29) != 0 { - rval |= SHA - } - if edx&(1<<26) != 0 { - rval |= IBPB - } - if ecx&(1<<30) != 0 { - rval |= SGXLC - } - if edx&(1<<27) != 0 { - rval |= STIBP - } - - // Only detect AVX-512 features if XGETBV is supported - if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { - // Check for OS support - eax, _ := xgetbv(0) - - // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and - // ZMM16-ZMM31 state are enabled by OS) - /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). - if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { - if ebx&(1<<16) != 0 { - rval |= AVX512F - } - if ebx&(1<<17) != 0 { - rval |= AVX512DQ - } - if ebx&(1<<21) != 0 { - rval |= AVX512IFMA - } - if ebx&(1<<26) != 0 { - rval |= AVX512PF - } - if ebx&(1<<27) != 0 { - rval |= AVX512ER - } - if ebx&(1<<28) != 0 { - rval |= AVX512CD - } - if ebx&(1<<30) != 0 { - rval |= AVX512BW - } - if ebx&(1<<31) != 0 { - rval |= AVX512VL - } - // ecx - if ecx&(1<<1) != 0 { - rval |= AVX512VBMI - } - if ecx&(1<<6) != 0 { - rval |= AVX512VBMI2 - } - if ecx&(1<<8) != 0 { - rval |= GFNI - } - if ecx&(1<<9) != 0 { - rval |= VAES - } - if ecx&(1<<10) != 0 { - rval |= VPCLMULQDQ - } - if ecx&(1<<11) != 0 { - rval |= AVX512VNNI - } - if ecx&(1<<12) != 0 { - rval |= AVX512BITALG - } - if ecx&(1<<14) != 0 { - rval |= AVX512VPOPCNTDQ - } - // edx - if edx&(1<<8) != 0 { - rval |= AVX512VP2INTERSECT - } - // cpuid eax 07h,ecx=1 - if eax1&(1<<5) != 0 { - rval |= AVX512BF16 - } - } - } - } - - if maxExtendedFunction() >= 0x80000001 { - _, _, c, d := cpuid(0x80000001) - if (c & (1 << 5)) != 0 { - rval |= LZCNT - rval |= POPCNT - } - if (d & (1 << 31)) != 0 { - rval |= AMD3DNOW - } - if (d & (1 << 30)) != 0 { - rval |= AMD3DNOWEXT - } - if (d & (1 << 23)) != 0 { - rval |= MMX - } - if (d & (1 << 22)) != 0 { - rval |= MMXEXT - } - if (c & (1 << 6)) != 0 { - rval |= SSE4A - } - if d&(1<<20) != 0 { - rval |= NX - } - if d&(1<<27) != 0 { - rval |= RDTSCP - } - - /* Allow for selectively disabling SSE2 functions on AMD processors - with SSE2 support but not SSE4a. This includes Athlon64, some - Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster - than SSE2 often enough to utilize this special-case flag. - AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case - so that SSE2 is used unless explicitly disabled by checking - AV_CPU_FLAG_SSE2SLOW. */ - if vend != Intel && - rval&SSE2 != 0 && (c&0x00000040) == 0 { - rval |= SSE2SLOW - } - - /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be - * used unless the OS has AVX support. */ - if (rval & AVX) != 0 { - if (c & 0x00000800) != 0 { - rval |= XOP - } - if (c & 0x00010000) != 0 { - rval |= FMA4 - } - } - - if vend == Intel { - family, model := familyModel() - if family == 6 && (model == 9 || model == 13 || model == 14) { - /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and - * 6/14 (core1 "yonah") theoretically support sse2, but it's - * usually slower than mmx. */ - if (rval & SSE2) != 0 { - rval |= SSE2SLOW - } - if (rval & SSE3) != 0 { - rval |= SSE3SLOW - } - } - /* The Atom processor has SSSE3 support, which is useful in many cases, - * but sometimes the SSSE3 version is slower than the SSE2 equivalent - * on the Atom, but is generally faster on other processors supporting - * SSSE3. This flag allows for selectively disabling certain SSSE3 - * functions on the Atom. */ - if family == 6 && model == 28 { - rval |= ATOM - } - } - } - return Flags(rval) -} - -func valAsString(values ...uint32) []byte { - r := make([]byte, 4*len(values)) - for i, v := range values { - dst := r[i*4:] - dst[0] = byte(v & 0xff) - dst[1] = byte((v >> 8) & 0xff) - dst[2] = byte((v >> 16) & 0xff) - dst[3] = byte((v >> 24) & 0xff) - switch { - case dst[0] == 0: - return r[:i*4] - case dst[1] == 0: - return r[:i*4+1] - case dst[2] == 0: - return r[:i*4+2] - case dst[3] == 0: - return r[:i*4+3] - } - } - return r -} - -// Single-precision and double-precision floating point -func (c CPUInfo) ArmFP() bool { - return c.Arm&FP != 0 -} - -// Advanced SIMD -func (c CPUInfo) ArmASIMD() bool { - return c.Arm&ASIMD != 0 -} - -// Generic timer -func (c CPUInfo) ArmEVTSTRM() bool { - return c.Arm&EVTSTRM != 0 -} - -// AES instructions -func (c CPUInfo) ArmAES() bool { - return c.Arm&AES != 0 -} - -// Polynomial Multiply instructions (PMULL/PMULL2) -func (c CPUInfo) ArmPMULL() bool { - return c.Arm&PMULL != 0 -} - -// SHA-1 instructions (SHA1C, etc) -func (c CPUInfo) ArmSHA1() bool { - return c.Arm&SHA1 != 0 -} - -// SHA-2 instructions (SHA256H, etc) -func (c CPUInfo) ArmSHA2() bool { - return c.Arm&SHA2 != 0 -} - -// CRC32/CRC32C instructions -func (c CPUInfo) ArmCRC32() bool { - return c.Arm&CRC32 != 0 -} - -// Large System Extensions (LSE) -func (c CPUInfo) ArmATOMICS() bool { - return c.Arm&ATOMICS != 0 -} - -// Half-precision floating point -func (c CPUInfo) ArmFPHP() bool { - return c.Arm&FPHP != 0 -} - -// Advanced SIMD half-precision floating point -func (c CPUInfo) ArmASIMDHP() bool { - return c.Arm&ASIMDHP != 0 -} - -// Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) -func (c CPUInfo) ArmASIMDRDM() bool { - return c.Arm&ASIMDRDM != 0 -} - -// Javascript-style double->int convert (FJCVTZS) -func (c CPUInfo) ArmJSCVT() bool { - return c.Arm&JSCVT != 0 -} - -// Floatin point complex number addition and multiplication -func (c CPUInfo) ArmFCMA() bool { - return c.Arm&FCMA != 0 -} - -// Weaker release consistency (LDAPR, etc) -func (c CPUInfo) ArmLRCPC() bool { - return c.Arm&LRCPC != 0 -} - -// Data cache clean to Point of Persistence (DC CVAP) -func (c CPUInfo) ArmDCPOP() bool { - return c.Arm&DCPOP != 0 -} - -// SHA-3 instructions (EOR3, RAXI, XAR, BCAX) -func (c CPUInfo) ArmSHA3() bool { - return c.Arm&SHA3 != 0 -} - -// SM3 instructions -func (c CPUInfo) ArmSM3() bool { - return c.Arm&SM3 != 0 -} - -// SM4 instructions -func (c CPUInfo) ArmSM4() bool { - return c.Arm&SM4 != 0 -} - -// SIMD Dot Product -func (c CPUInfo) ArmASIMDDP() bool { - return c.Arm&ASIMDDP != 0 -} - -// SHA512 instructions -func (c CPUInfo) ArmSHA512() bool { - return c.Arm&SHA512 != 0 -} - -// Scalable Vector Extension -func (c CPUInfo) ArmSVE() bool { - return c.Arm&SVE != 0 -} - -// Generic Pointer Authentication -func (c CPUInfo) ArmGPA() bool { - return c.Arm&GPA != 0 -} diff --git a/vendor/github.com/klauspost/cpuid/.gitignore b/vendor/github.com/klauspost/cpuid/v2/.gitignore index daf913b1b..daf913b1b 100644 --- a/vendor/github.com/klauspost/cpuid/.gitignore +++ b/vendor/github.com/klauspost/cpuid/v2/.gitignore diff --git a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml new file mode 100644 index 000000000..944cc0007 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml @@ -0,0 +1,74 @@ +# This is an example goreleaser.yaml file with some sane defaults. +# Make sure to check the documentation at http://goreleaser.com + +builds: + - + id: "cpuid" + binary: cpuid + main: ./cmd/cpuid/main.go + env: + - CGO_ENABLED=0 + flags: + - -ldflags=-s -w + goos: + - aix + - linux + - freebsd + - netbsd + - windows + - darwin + goarch: + - 386 + - amd64 + - arm64 + goarm: + - 7 + +archives: + - + id: cpuid + name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}" + replacements: + aix: AIX + darwin: OSX + linux: Linux + windows: Windows + 386: i386 + amd64: x86_64 + freebsd: FreeBSD + netbsd: NetBSD + format_overrides: + - goos: windows + format: zip + files: + - LICENSE +checksum: + name_template: 'checksums.txt' +snapshot: + name_template: "{{ .Tag }}-next" +changelog: + sort: asc + filters: + exclude: + - '^doc:' + - '^docs:' + - '^test:' + - '^tests:' + - '^Update\sREADME.md' + +nfpms: + - + file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" + vendor: Klaus Post + homepage: https://github.com/klauspost/cpuid + maintainer: Klaus Post <klauspost@gmail.com> + description: CPUID Tool + license: BSD 3-Clause + formats: + - deb + - rpm + replacements: + darwin: Darwin + linux: Linux + freebsd: FreeBSD + amd64: x86_64 diff --git a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt index 2ef4714f7..2ef4714f7 100644 --- a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt +++ b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/v2/LICENSE index 5cec7ee94..5cec7ee94 100644 --- a/vendor/github.com/klauspost/cpuid/LICENSE +++ b/vendor/github.com/klauspost/cpuid/v2/LICENSE diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md new file mode 100644 index 000000000..ea7df3dd8 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/README.md @@ -0,0 +1,258 @@ +# cpuid +Package cpuid provides information about the CPU running the current program. + +CPU features are detected on startup, and kept for fast access through the life of the application. +Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use. + +You can access the CPU information by accessing the shared CPU variable of the cpuid library. + +Package home: https://github.com/klauspost/cpuid + +[](https://pkg.go.dev/github.com/klauspost/cpuid/v2) +[![Build Status][3]][4] + +[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master +[4]: https://travis-ci.org/klauspost/cpuid + +## installing + +`go get -u github.com/klauspost/cpuid/v2` using modules. + +Drop `v2` for others. + +## example + +```Go +package main + +import ( + "fmt" + "strings" + + . "github.com/klauspost/cpuid/v2" +) + +func main() { + // Print basic CPU information: + fmt.Println("Name:", CPU.BrandName) + fmt.Println("PhysicalCores:", CPU.PhysicalCores) + fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore) + fmt.Println("LogicalCores:", CPU.LogicalCores) + fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID) + fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ",")) + fmt.Println("Cacheline bytes:", CPU.CacheLine) + fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes") + fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes") + fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes") + fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes") + fmt.Println("Frequency", CPU.Hz, "hz") + + // Test if we have these specific features: + if CPU.Supports(SSE, SSE2) { + fmt.Println("We have Streaming SIMD 2 Extensions") + } +} +``` + +Sample output: +``` +>go run main.go +Name: AMD Ryzen 9 3950X 16-Core Processor +PhysicalCores: 16 +ThreadsPerCore: 2 +LogicalCores: 32 +Family 23 Model: 113 Vendor ID: AMD +Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3 +Cacheline bytes: 64 +L1 Data Cache: 32768 bytes +L1 Instruction Cache: 32768 bytes +L2 Cache: 524288 bytes +L3 Cache: 16777216 bytes +Frequency 0 hz +We have Streaming SIMD 2 Extensions +``` + +# usage + +The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features. +A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler. + +Note that for some cpu/os combinations some features will not be detected. +`amd64` has rather good support and should work reliably on all platforms. + +Note that hypervisors may not pass through all CPU features. + +## arm64 feature detection + +Not all operating systems provide ARM features directly +and there is no safe way to do so for the rest. + +Currently `arm64/linux` and `arm64/freebsd` should be quite reliable. +`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected. + +A `DetectARM()` can be used if you are able to control your deployment, +it will detect CPU features, but may crash if the OS doesn't intercept the calls. +A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below. + +Note that currently only features are detected on ARM, +no additional information is currently available. + +## flags + +It is possible to add flags that affects cpu detection. + +For this the `Flags()` command is provided. + +This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called. + +This means that any detection used in `init()` functions will not contain these flags. + +Example: + +```Go +package main + +import ( + "flag" + "fmt" + "strings" + + "github.com/klauspost/cpuid/v2" +) + +func main() { + cpuid.Flags() + flag.Parse() + cpuid.Detect() + + // Test if we have these specific features: + if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) { + fmt.Println("We have Streaming SIMD 2 Extensions") + } +} +``` + +## commandline + +Download as binary from: https://github.com/klauspost/cpuid/releases + +Install from source: + +`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest` + +### Example + +``` +λ cpuid +Name: AMD Ryzen 9 3950X 16-Core Processor +Vendor String: AuthenticAMD +Vendor ID: AMD +PhysicalCores: 16 +Threads Per Core: 2 +Logical Cores: 32 +CPU Family 23 Model: 113 +Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE +Microarchitecture level: 3 +Cacheline bytes: 64 +L1 Instruction Cache: 32768 bytes +L1 Data Cache: 32768 bytes +L2 Cache: 524288 bytes +L3 Cache: 16777216 bytes + +``` +### JSON Output: + +``` +λ cpuid --json +{ + "BrandName": "AMD Ryzen 9 3950X 16-Core Processor", + "VendorID": 2, + "VendorString": "AuthenticAMD", + "PhysicalCores": 16, + "ThreadsPerCore": 2, + "LogicalCores": 32, + "Family": 23, + "Model": 113, + "CacheLine": 64, + "Hz": 0, + "BoostFreq": 0, + "Cache": { + "L1I": 32768, + "L1D": 32768, + "L2": 524288, + "L3": 16777216 + }, + "SGX": { + "Available": false, + "LaunchControl": false, + "SGX1Supported": false, + "SGX2Supported": false, + "MaxEnclaveSizeNot64": 0, + "MaxEnclaveSize64": 0, + "EPCSections": null + }, + "Features": [ + "ADX", + "AESNI", + "AVX", + "AVX2", + "BMI1", + "BMI2", + "CLMUL", + "CLZERO", + "CMOV", + "CMPXCHG8", + "CPBOOST", + "CX16", + "F16C", + "FMA3", + "FXSR", + "FXSROPT", + "HTT", + "HYPERVISOR", + "LAHF", + "LZCNT", + "MCAOVERFLOW", + "MMX", + "MMXEXT", + "MOVBE", + "NX", + "OSXSAVE", + "POPCNT", + "RDRAND", + "RDSEED", + "RDTSCP", + "SCE", + "SHA", + "SSE", + "SSE2", + "SSE3", + "SSE4", + "SSE42", + "SSE4A", + "SSSE3", + "SUCCOR", + "X87", + "XSAVE" + ], + "X64Level": 3 +} +``` + +### Check CPU microarch level + +``` +λ cpuid --check-level=3 +2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor +2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3. +Exit Code 0 + +λ cpuid --check-level=4 +2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor +2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3. +Exit Code 1 +``` + +# license + +This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go new file mode 100644 index 000000000..27f33250e --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -0,0 +1,1291 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// Package cpuid provides information about the CPU running the current program. +// +// CPU features are detected on startup, and kept for fast access through the life of the application. +// Currently x86 / x64 (AMD64) as well as arm64 is supported. +// +// You can access the CPU information by accessing the shared CPU variable of the cpuid library. +// +// Package home: https://github.com/klauspost/cpuid +package cpuid + +import ( + "flag" + "fmt" + "math" + "math/bits" + "os" + "runtime" + "strings" +) + +// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf +// and Processor Programming Reference (PPR) + +// Vendor is a representation of a CPU vendor. +type Vendor int + +const ( + VendorUnknown Vendor = iota + Intel + AMD + VIA + Transmeta + NSC + KVM // Kernel-based Virtual Machine + MSVM // Microsoft Hyper-V or Windows Virtual PC + VMware + XenHVM + Bhyve + Hygon + SiS + RDC + + Ampere + ARM + Broadcom + Cavium + DEC + Fujitsu + Infineon + Motorola + NVIDIA + AMCC + Qualcomm + Marvell + + lastVendor +) + +//go:generate stringer -type=FeatureID,Vendor + +// FeatureID is the ID of a specific cpu feature. +type FeatureID int + +const ( + // Keep index -1 as unknown + UNKNOWN = -1 + + // Add features + ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + AESNI // Advanced Encryption Standard New Instructions + AMD3DNOW // AMD 3DNOW + AMD3DNOWEXT // AMD 3DNowExt + AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXINT8 // Tile computational operations on 8-bit integers + AMXTILE // Tile architecture + AVX // AVX functions + AVX2 // AVX2 functions + AVX512BF16 // AVX-512 BFLOAT16 Instructions + AVX512BITALG // AVX-512 Bit Algorithms + AVX512BW // AVX-512 Byte and Word Instructions + AVX512CD // AVX-512 Conflict Detection Instructions + AVX512DQ // AVX-512 Doubleword and Quadword Instructions + AVX512ER // AVX-512 Exponential and Reciprocal Instructions + AVX512F // AVX-512 Foundation + AVX512FP16 // AVX-512 FP16 Instructions + AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF // AVX-512 Prefetch Instructions + AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VL // AVX-512 Vector Length Extensions + AVX512VNNI // AVX-512 Vector Neural Network Instructions + AVX512VP2INTERSECT // AVX-512 Intersect for D/Q + AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one + AVXVNNI // AVX (VEX encoded) VNNI neural network instructions + BMI1 // Bit Manipulation Instruction Set 1 + BMI2 // Bit Manipulation Instruction Set 2 + CETIBT // Intel CET Indirect Branch Tracking + CETSS // Intel CET Shadow Stack + CLDEMOTE // Cache Line Demote + CLMUL // Carry-less Multiplication + CLZERO // CLZERO instruction supported + CMOV // i686 CMOV + CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB + CMPXCHG8 // CMPXCHG8 instruction + CPBOOST // Core Performance Boost + CX16 // CMPXCHG16B Instruction + ENQCMD // Enqueue Command + ERMS // Enhanced REP MOVSB/STOSB + F16C // Half-precision floating-point conversion + FMA3 // Intel FMA 3. Does not imply AVX. + FMA4 // Bulldozer FMA4 functions + FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 + FXSROPT // FXSAVE/FXRSTOR optimizations + GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. + HLE // Hardware Lock Elision + HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR + HTT // Hyperthreading (enabled) + HWA // Hardware assert supported. Indicates support for MSRC001_10 + HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBS // Instruction Based Sampling (AMD) + IBSBRNTRGT // Instruction Based Sampling Feature (AMD) + IBSFETCHSAM // Instruction Based Sampling Feature (AMD) + IBSFFV // Instruction Based Sampling Feature (AMD) + IBSOPCNT // Instruction Based Sampling Feature (AMD) + IBSOPCNTEXT // Instruction Based Sampling Feature (AMD) + IBSOPSAM // Instruction Based Sampling Feature (AMD) + IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) + IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + IBS_PREVENTHOST // Disallowing IBS use by the host supported + INT_WBINVD // WBINVD/WBNOINVD are interruptible. + INVLPGB // NVLPGB and TLBSYNC instruction supported + LAHF // LAHF/SAHF in long mode + LAM // If set, CPU supports Linear Address Masking + LBRVIRT // LBR virtualization + LZCNT // LZCNT instruction + MCAOVERFLOW // MCA overflow recovery support. + MCOMMIT // MCOMMIT instruction supported + MMX // standard MMX + MMXEXT // SSE integer functions or AMD MMX ext + MOVBE // MOVBE instruction (big-endian) + MOVDIR64B // Move 64 Bytes as Direct Store + MOVDIRI // Move Doubleword as Direct Store + MOVSB_ZL // Fast Zero-Length MOVSB + MPX // Intel MPX (Memory Protection Extensions) + MSRIRC // Instruction Retired Counter MSR available + MSR_PAGEFLUSH // Page Flush MSR available + NRIPS // Indicates support for NRIP save on VMEXIT + NX // NX (No-Execute) bit + OSXSAVE // XSAVE enabled by OS + PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption + POPCNT // POPCNT instruction + RDPRU // RDPRU instruction supported + RDRAND // RDRAND instruction is available + RDSEED // RDSEED instruction is available + RDTSCP // RDTSCP Instruction + RTM // Restricted Transactional Memory + RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. + SERIALIZE // Serialize Instruction Execution + SEV // AMD Secure Encrypted Virtualization supported + SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host + SEV_ALTERNATIVE // AMD SEV Alternate Injection supported + SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests + SEV_ES // AMD SEV Encrypted State supported + SEV_RESTRICTED // AMD SEV Restricted Injection supported + SEV_SNP // AMD SEV Secure Nested Paging supported + SGX // Software Guard Extensions + SGXLC // Software Guard Extensions Launch Control + SHA // Intel SHA Extensions + SME // AMD Secure Memory Encryption supported + SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced + SSE // SSE functions + SSE2 // P4 SSE functions + SSE3 // Prescott SSE3 functions + SSE4 // Penryn SSE4.1 functions + SSE42 // Nehalem SSE4.2 functions + SSE4A // AMD Barcelona microarchitecture SSE4a instructions + SSSE3 // Conroe SSSE3 functions + STIBP // Single Thread Indirect Branch Predictors + STOSB_SHORT // Fast short STOSB + SUCCOR // Software uncorrectable error containment and recovery capability. + SVM // AMD Secure Virtual Machine + SVMDA // Indicates support for the SVM decode assists. + SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control + SVML // AMD SVM lock. Indicates support for SVM-Lock. + SVMNP // AMD SVM nested paging + SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter + SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold + SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. + SYSEE // SYSENTER and SYSEXIT instructions + TBM // AMD Trailing Bit Manipulation + TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. + TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 + TSXLDTRK // Intel TSX Suspend Load Address Tracking + VAES // Vector AES. AVX(512) versions requires additional checks. + VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. + VMPL // AMD VM Permission Levels supported + VMSA_REGPROT // AMD VMSA Register Protection supported + VMX // Virtual Machine Extensions + VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. + VTE // AMD Virtual Transparent Encryption supported + WAITPKG // TPAUSE, UMONITOR, UMWAIT + WBNOINVD // Write Back and Do Not Invalidate Cache + X87 // FPU + XGETBV1 // Supports XGETBV with ECX = 1 + XOP // Bulldozer XOP functions + XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV + XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. + XSAVEOPT // XSAVEOPT available + XSAVES // Supports XSAVES/XRSTORS and IA32_XSS + + // ARM features: + AESARM // AES instructions + ARMCPUID // Some CPU ID registers readable at user-level + ASIMD // Advanced SIMD + ASIMDDP // SIMD Dot Product + ASIMDHP // Advanced SIMD half-precision floating point + ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) + ATOMICS // Large System Extensions (LSE) + CRC32 // CRC32/CRC32C instructions + DCPOP // Data cache clean to Point of Persistence (DC CVAP) + EVTSTRM // Generic timer + FCMA // Floatin point complex number addition and multiplication + FP // Single-precision and double-precision floating point + FPHP // Half-precision floating point + GPA // Generic Pointer Authentication + JSCVT // Javascript-style double->int convert (FJCVTZS) + LRCPC // Weaker release consistency (LDAPR, etc) + PMULL // Polynomial Multiply instructions (PMULL/PMULL2) + SHA1 // SHA-1 instructions (SHA1C, etc) + SHA2 // SHA-2 instructions (SHA256H, etc) + SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) + SHA512 // SHA512 instructions + SM3 // SM3 instructions + SM4 // SM4 instructions + SVE // Scalable Vector Extension + // Keep it last. It automatically defines the size of []flagSet + lastID + + firstID FeatureID = UNKNOWN + 1 +) + +// CPUInfo contains information about the detected system CPU. +type CPUInfo struct { + BrandName string // Brand name reported by the CPU + VendorID Vendor // Comparable CPU vendor ID + VendorString string // Raw vendor string. + featureSet flagSet // Features of the CPU + PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. + ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. + LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. + Family int // CPU family number + Model int // CPU model number + Stepping int // CPU stepping info + CacheLine int // Cache line size in bytes. Will be 0 if undetectable. + Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. + BoostFreq int64 // Max clock speed, if known, 0 otherwise + Cache struct { + L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected + L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected + L2 int // L2 Cache (per core or shared). Will be -1 if undetected + L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected + } + SGX SGXSupport + maxFunc uint32 + maxExFunc uint32 +} + +var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) +var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) +var xgetbv func(index uint32) (eax, edx uint32) +var rdtscpAsm func() (eax, ebx, ecx, edx uint32) +var darwinHasAVX512 = func() bool { return false } + +// CPU contains information about the CPU as detected on startup, +// or when Detect last was called. +// +// Use this as the primary entry point to you data. +var CPU CPUInfo + +func init() { + initCPU() + Detect() +} + +// Detect will re-detect current CPU info. +// This will replace the content of the exported CPU variable. +// +// Unless you expect the CPU to change while you are running your program +// you should not need to call this function. +// If you call this, you must ensure that no other goroutine is accessing the +// exported CPU variable. +func Detect() { + // Set defaults + CPU.ThreadsPerCore = 1 + CPU.Cache.L1I = -1 + CPU.Cache.L1D = -1 + CPU.Cache.L2 = -1 + CPU.Cache.L3 = -1 + safe := true + if detectArmFlag != nil { + safe = !*detectArmFlag + } + addInfo(&CPU, safe) + if displayFeats != nil && *displayFeats { + fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ",")) + // Exit with non-zero so tests will print value. + os.Exit(1) + } + if disableFlag != nil { + s := strings.Split(*disableFlag, ",") + for _, feat := range s { + feat := ParseFeature(strings.TrimSpace(feat)) + if feat != UNKNOWN { + CPU.featureSet.unset(feat) + } + } + } +} + +// DetectARM will detect ARM64 features. +// This is NOT done automatically since it can potentially crash +// if the OS does not handle the command. +// If in the future this can be done safely this function may not +// do anything. +func DetectARM() { + addInfo(&CPU, false) +} + +var detectArmFlag *bool +var displayFeats *bool +var disableFlag *string + +// Flags will enable flags. +// This must be called *before* flag.Parse AND +// Detect must be called after the flags have been parsed. +// Note that this means that any detection used in init() functions +// will not contain these flags. +func Flags() { + disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list") + displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits") + detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash") +} + +// Supports returns whether the CPU supports all of the requested features. +func (c CPUInfo) Supports(ids ...FeatureID) bool { + for _, id := range ids { + if !c.featureSet.inSet(id) { + return false + } + } + return true +} + +// Has allows for checking a single feature. +// Should be inlined by the compiler. +func (c CPUInfo) Has(id FeatureID) bool { + return c.featureSet.inSet(id) +} + +// AnyOf returns whether the CPU supports one or more of the requested features. +func (c CPUInfo) AnyOf(ids ...FeatureID) bool { + for _, id := range ids { + if c.featureSet.inSet(id) { + return true + } + } + return false +} + +// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels +var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2) +var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) +var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) +var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) + +// X64Level returns the microarchitecture level detected on the CPU. +// If features are lacking or non x64 mode, 0 is returned. +// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels +func (c CPUInfo) X64Level() int { + if c.featureSet.hasSet(level4Features) { + return 4 + } + if c.featureSet.hasSet(level3Features) { + return 3 + } + if c.featureSet.hasSet(level2Features) { + return 2 + } + if c.featureSet.hasSet(level1Features) { + return 1 + } + return 0 +} + +// Disable will disable one or several features. +func (c *CPUInfo) Disable(ids ...FeatureID) bool { + for _, id := range ids { + c.featureSet.unset(id) + } + return true +} + +// Enable will disable one or several features even if they were undetected. +// This is of course not recommended for obvious reasons. +func (c *CPUInfo) Enable(ids ...FeatureID) bool { + for _, id := range ids { + c.featureSet.set(id) + } + return true +} + +// IsVendor returns true if vendor is recognized as Intel +func (c CPUInfo) IsVendor(v Vendor) bool { + return c.VendorID == v +} + +// FeatureSet returns all available features as strings. +func (c CPUInfo) FeatureSet() []string { + s := make([]string, 0, c.featureSet.nEnabled()) + s = append(s, c.featureSet.Strings()...) + return s +} + +// RTCounter returns the 64-bit time-stamp counter +// Uses the RDTSCP instruction. The value 0 is returned +// if the CPU does not support the instruction. +func (c CPUInfo) RTCounter() uint64 { + if !c.Supports(RDTSCP) { + return 0 + } + a, _, _, d := rdtscpAsm() + return uint64(a) | (uint64(d) << 32) +} + +// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. +// This variable is OS dependent, but on Linux contains information +// about the current cpu/core the code is running on. +// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. +func (c CPUInfo) Ia32TscAux() uint32 { + if !c.Supports(RDTSCP) { + return 0 + } + _, _, ecx, _ := rdtscpAsm() + return ecx +} + +// LogicalCPU will return the Logical CPU the code is currently executing on. +// This is likely to change when the OS re-schedules the running thread +// to another CPU. +// If the current core cannot be detected, -1 will be returned. +func (c CPUInfo) LogicalCPU() int { + if c.maxFunc < 1 { + return -1 + } + _, ebx, _, _ := cpuid(1) + return int(ebx >> 24) +} + +// frequencies tries to compute the clock speed of the CPU. If leaf 15 is +// supported, use it, otherwise parse the brand string. Yes, really. +func (c *CPUInfo) frequencies() { + c.Hz, c.BoostFreq = 0, 0 + mfi := maxFunctionID() + if mfi >= 0x15 { + eax, ebx, ecx, _ := cpuid(0x15) + if eax != 0 && ebx != 0 && ecx != 0 { + c.Hz = (int64(ecx) * int64(ebx)) / int64(eax) + } + } + if mfi >= 0x16 { + a, b, _, _ := cpuid(0x16) + // Base... + if a&0xffff > 0 { + c.Hz = int64(a&0xffff) * 1_000_000 + } + // Boost... + if b&0xffff > 0 { + c.BoostFreq = int64(b&0xffff) * 1_000_000 + } + } + if c.Hz > 0 { + return + } + + // computeHz determines the official rated speed of a CPU from its brand + // string. This insanity is *actually the official documented way to do + // this according to Intel*, prior to leaf 0x15 existing. The official + // documentation only shows this working for exactly `x.xx` or `xxxx` + // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other + // sizes. + model := c.BrandName + hz := strings.LastIndex(model, "Hz") + if hz < 3 { + return + } + var multiplier int64 + switch model[hz-1] { + case 'M': + multiplier = 1000 * 1000 + case 'G': + multiplier = 1000 * 1000 * 1000 + case 'T': + multiplier = 1000 * 1000 * 1000 * 1000 + } + if multiplier == 0 { + return + } + freq := int64(0) + divisor := int64(0) + decimalShift := int64(1) + var i int + for i = hz - 2; i >= 0 && model[i] != ' '; i-- { + if model[i] >= '0' && model[i] <= '9' { + freq += int64(model[i]-'0') * decimalShift + decimalShift *= 10 + } else if model[i] == '.' { + if divisor != 0 { + return + } + divisor = decimalShift + } else { + return + } + } + // we didn't find a space + if i < 0 { + return + } + if divisor != 0 { + c.Hz = (freq * multiplier) / divisor + return + } + c.Hz = freq * multiplier +} + +// VM Will return true if the cpu id indicates we are in +// a virtual machine. +func (c CPUInfo) VM() bool { + return CPU.featureSet.inSet(HYPERVISOR) +} + +// flags contains detected cpu features and characteristics +type flags uint64 + +// log2(bits_in_uint64) +const flagBitsLog2 = 6 +const flagBits = 1 << flagBitsLog2 +const flagMask = flagBits - 1 + +// flagSet contains detected cpu features and characteristics in an array of flags +type flagSet [(lastID + flagMask) / flagBits]flags + +func (s flagSet) inSet(feat FeatureID) bool { + return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 +} + +func (s *flagSet) set(feat FeatureID) { + s[feat>>flagBitsLog2] |= 1 << (feat & flagMask) +} + +// setIf will set a feature if boolean is true. +func (s *flagSet) setIf(cond bool, features ...FeatureID) { + if cond { + for _, offset := range features { + s[offset>>flagBitsLog2] |= 1 << (offset & flagMask) + } + } +} + +func (s *flagSet) unset(offset FeatureID) { + bit := flags(1 << (offset & flagMask)) + s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit +} + +// or with another flagset. +func (s *flagSet) or(other flagSet) { + for i, v := range other[:] { + s[i] |= v + } +} + +// hasSet returns whether all features are present. +func (s flagSet) hasSet(other flagSet) bool { + for i, v := range other[:] { + if s[i]&v != v { + return false + } + } + return true +} + +// nEnabled will return the number of enabled flags. +func (s flagSet) nEnabled() (n int) { + for _, v := range s[:] { + n += bits.OnesCount64(uint64(v)) + } + return n +} + +func flagSetWith(feat ...FeatureID) flagSet { + var res flagSet + for _, f := range feat { + res.set(f) + } + return res +} + +// ParseFeature will parse the string and return the ID of the matching feature. +// Will return UNKNOWN if not found. +func ParseFeature(s string) FeatureID { + s = strings.ToUpper(s) + for i := firstID; i < lastID; i++ { + if i.String() == s { + return i + } + } + return UNKNOWN +} + +// Strings returns an array of the detected features for FlagsSet. +func (s flagSet) Strings() []string { + if len(s) == 0 { + return []string{""} + } + r := make([]string, 0) + for i := firstID; i < lastID; i++ { + if s.inSet(i) { + r = append(r, i.String()) + } + } + return r +} + +func maxExtendedFunction() uint32 { + eax, _, _, _ := cpuid(0x80000000) + return eax +} + +func maxFunctionID() uint32 { + a, _, _, _ := cpuid(0) + return a +} + +func brandName() string { + if maxExtendedFunction() >= 0x80000004 { + v := make([]uint32, 0, 48) + for i := uint32(0); i < 3; i++ { + a, b, c, d := cpuid(0x80000002 + i) + v = append(v, a, b, c, d) + } + return strings.Trim(string(valAsString(v...)), " ") + } + return "unknown" +} + +func threadsPerCore() int { + mfi := maxFunctionID() + vend, _ := vendorID() + + if mfi < 0x4 || (vend != Intel && vend != AMD) { + return 1 + } + + if mfi < 0xb { + if vend != Intel { + return 1 + } + _, b, _, d := cpuid(1) + if (d & (1 << 28)) != 0 { + // v will contain logical core count + v := (b >> 16) & 255 + if v > 1 { + a4, _, _, _ := cpuid(4) + // physical cores + v2 := (a4 >> 26) + 1 + if v2 > 0 { + return int(v) / int(v2) + } + } + } + return 1 + } + _, b, _, _ := cpuidex(0xb, 0) + if b&0xffff == 0 { + if vend == AMD { + // Workaround for AMD returning 0, assume 2 if >= Zen 2 + // It will be more correct than not. + fam, _, _ := familyModel() + _, _, _, d := cpuid(1) + if (d&(1<<28)) != 0 && fam >= 23 { + return 2 + } + } + return 1 + } + return int(b & 0xffff) +} + +func logicalCores() int { + mfi := maxFunctionID() + v, _ := vendorID() + switch v { + case Intel: + // Use this on old Intel processors + if mfi < 0xb { + if mfi < 1 { + return 0 + } + // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) + // that can be assigned to logical processors in a physical package. + // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. + _, ebx, _, _ := cpuid(1) + logical := (ebx >> 16) & 0xff + return int(logical) + } + _, b, _, _ := cpuidex(0xb, 1) + return int(b & 0xffff) + case AMD, Hygon: + _, b, _, _ := cpuid(1) + return int((b >> 16) & 0xff) + default: + return 0 + } +} + +func familyModel() (family, model, stepping int) { + if maxFunctionID() < 0x1 { + return 0, 0, 0 + } + eax, _, _, _ := cpuid(1) + // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. + family = int((eax >> 8) & 0xf) + extFam := family == 0x6 // Intel is 0x6, needs extended model. + if family == 0xf { + // Add ExtFamily + family += int((eax >> 20) & 0xff) + extFam = true + } + // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. + model = int((eax >> 4) & 0xf) + if extFam { + // Add ExtModel + model += int((eax >> 12) & 0xf0) + } + stepping = int(eax & 0xf) + return family, model, stepping +} + +func physicalCores() int { + v, _ := vendorID() + switch v { + case Intel: + return logicalCores() / threadsPerCore() + case AMD, Hygon: + lc := logicalCores() + tpc := threadsPerCore() + if lc > 0 && tpc > 0 { + return lc / tpc + } + + // The following is inaccurate on AMD EPYC 7742 64-Core Processor + if maxExtendedFunction() >= 0x80000008 { + _, _, c, _ := cpuid(0x80000008) + if c&0xff > 0 { + return int(c&0xff) + 1 + } + } + } + return 0 +} + +// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID +var vendorMapping = map[string]Vendor{ + "AMDisbetter!": AMD, + "AuthenticAMD": AMD, + "CentaurHauls": VIA, + "GenuineIntel": Intel, + "TransmetaCPU": Transmeta, + "GenuineTMx86": Transmeta, + "Geode by NSC": NSC, + "VIA VIA VIA ": VIA, + "KVMKVMKVMKVM": KVM, + "Microsoft Hv": MSVM, + "VMwareVMware": VMware, + "XenVMMXenVMM": XenHVM, + "bhyve bhyve ": Bhyve, + "HygonGenuine": Hygon, + "Vortex86 SoC": SiS, + "SiS SiS SiS ": SiS, + "RiseRiseRise": SiS, + "Genuine RDC": RDC, +} + +func vendorID() (Vendor, string) { + _, b, c, d := cpuid(0) + v := string(valAsString(b, d, c)) + vend, ok := vendorMapping[v] + if !ok { + return VendorUnknown, v + } + return vend, v +} + +func cacheLine() int { + if maxFunctionID() < 0x1 { + return 0 + } + + _, ebx, _, _ := cpuid(1) + cache := (ebx & 0xff00) >> 5 // cflush size + if cache == 0 && maxExtendedFunction() >= 0x80000006 { + _, _, ecx, _ := cpuid(0x80000006) + cache = ecx & 0xff // cacheline size + } + // TODO: Read from Cache and TLB Information + return int(cache) +} + +func (c *CPUInfo) cacheSize() { + c.Cache.L1D = -1 + c.Cache.L1I = -1 + c.Cache.L2 = -1 + c.Cache.L3 = -1 + vendor, _ := vendorID() + switch vendor { + case Intel: + if maxFunctionID() < 4 { + return + } + c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0 + for i := uint32(0); ; i++ { + eax, ebx, ecx, _ := cpuidex(4, i) + cacheType := eax & 15 + if cacheType == 0 { + break + } + cacheLevel := (eax >> 5) & 7 + coherency := int(ebx&0xfff) + 1 + partitions := int((ebx>>12)&0x3ff) + 1 + associativity := int((ebx>>22)&0x3ff) + 1 + sets := int(ecx) + 1 + size := associativity * partitions * coherency * sets + switch cacheLevel { + case 1: + if cacheType == 1 { + // 1 = Data Cache + c.Cache.L1D = size + } else if cacheType == 2 { + // 2 = Instruction Cache + c.Cache.L1I = size + } else { + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + case AMD, Hygon: + // Untested. + if maxExtendedFunction() < 0x80000005 { + return + } + _, _, ecx, edx := cpuid(0x80000005) + c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) + c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) + + if maxExtendedFunction() < 0x80000006 { + return + } + _, _, ecx, _ = cpuid(0x80000006) + c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) + + // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties + if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { + return + } + + // Xen Hypervisor is buggy and returns the same entry no matter ECX value. + // Hack: When we encounter the same entry 100 times we break. + nSame := 0 + var last uint32 + for i := uint32(0); i < math.MaxUint32; i++ { + eax, ebx, ecx, _ := cpuidex(0x8000001D, i) + + level := (eax >> 5) & 7 + cacheNumSets := ecx + 1 + cacheLineSize := 1 + (ebx & 2047) + cachePhysPartitions := 1 + ((ebx >> 12) & 511) + cacheNumWays := 1 + ((ebx >> 22) & 511) + + typ := eax & 15 + size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) + if typ == 0 { + return + } + + // Check for the same value repeated. + comb := eax ^ ebx ^ ecx + if comb == last { + nSame++ + if nSame == 100 { + return + } + } + last = comb + + switch level { + case 1: + switch typ { + case 1: + // Data cache + c.Cache.L1D = size + case 2: + // Inst cache + c.Cache.L1I = size + default: + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + } +} + +type SGXEPCSection struct { + BaseAddress uint64 + EPCSize uint64 +} + +type SGXSupport struct { + Available bool + LaunchControl bool + SGX1Supported bool + SGX2Supported bool + MaxEnclaveSizeNot64 int64 + MaxEnclaveSize64 int64 + EPCSections []SGXEPCSection +} + +func hasSGX(available, lc bool) (rval SGXSupport) { + rval.Available = available + + if !available { + return + } + + rval.LaunchControl = lc + + a, _, _, d := cpuidex(0x12, 0) + rval.SGX1Supported = a&0x01 != 0 + rval.SGX2Supported = a&0x02 != 0 + rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 + rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 + rval.EPCSections = make([]SGXEPCSection, 0) + + for subleaf := uint32(2); subleaf < 2+8; subleaf++ { + eax, ebx, ecx, edx := cpuidex(0x12, subleaf) + leafType := eax & 0xf + + if leafType == 0 { + // Invalid subleaf, stop iterating + break + } else if leafType == 1 { + // EPC Section subleaf + baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) + size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) + + section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} + rval.EPCSections = append(rval.EPCSections, section) + } + } + + return +} + +func support() flagSet { + var fs flagSet + mfi := maxFunctionID() + vend, _ := vendorID() + if mfi < 0x1 { + return fs + } + family, model, _ := familyModel() + + _, _, c, d := cpuid(1) + fs.setIf((d&(1<<0)) != 0, X87) + fs.setIf((d&(1<<8)) != 0, CMPXCHG8) + fs.setIf((d&(1<<11)) != 0, SYSEE) + fs.setIf((d&(1<<15)) != 0, CMOV) + fs.setIf((d&(1<<23)) != 0, MMX) + fs.setIf((d&(1<<24)) != 0, FXSR) + fs.setIf((d&(1<<25)) != 0, FXSROPT) + fs.setIf((d&(1<<25)) != 0, SSE) + fs.setIf((d&(1<<26)) != 0, SSE2) + fs.setIf((c&1) != 0, SSE3) + fs.setIf((c&(1<<5)) != 0, VMX) + fs.setIf((c&(1<<9)) != 0, SSSE3) + fs.setIf((c&(1<<19)) != 0, SSE4) + fs.setIf((c&(1<<20)) != 0, SSE42) + fs.setIf((c&(1<<25)) != 0, AESNI) + fs.setIf((c&(1<<1)) != 0, CLMUL) + fs.setIf(c&(1<<22) != 0, MOVBE) + fs.setIf(c&(1<<23) != 0, POPCNT) + fs.setIf(c&(1<<30) != 0, RDRAND) + + // This bit has been reserved by Intel & AMD for use by hypervisors, + // and indicates the presence of a hypervisor. + fs.setIf(c&(1<<31) != 0, HYPERVISOR) + fs.setIf(c&(1<<29) != 0, F16C) + fs.setIf(c&(1<<13) != 0, CX16) + + if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { + fs.setIf(threadsPerCore() > 1, HTT) + } + if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { + fs.setIf(threadsPerCore() > 1, HTT) + } + fs.setIf(c&1<<26 != 0, XSAVE) + fs.setIf(c&1<<27 != 0, OSXSAVE) + // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits + const avxCheck = 1<<26 | 1<<27 | 1<<28 + if c&avxCheck == avxCheck { + // Check for OS support + eax, _ := xgetbv(0) + if (eax & 0x6) == 0x6 { + fs.set(AVX) + switch vend { + case Intel: + // Older than Haswell. + fs.setIf(family == 6 && model < 60, AVXSLOW) + case AMD: + // Older than Zen 2 + fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW) + } + } + } + // FMA3 can be used with SSE registers, so no OS support is strictly needed. + // fma3 and OSXSAVE needed. + const fma3Check = 1<<12 | 1<<27 + fs.setIf(c&fma3Check == fma3Check, FMA3) + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 { + _, ebx, ecx, edx := cpuidex(7, 0) + if fs.inSet(AVX) && (ebx&0x00000020) != 0 { + fs.set(AVX2) + } + // CPUID.(EAX=7, ECX=0).EBX + if (ebx & 0x00000008) != 0 { + fs.set(BMI1) + fs.setIf((ebx&0x00000100) != 0, BMI2) + } + fs.setIf(ebx&(1<<2) != 0, SGX) + fs.setIf(ebx&(1<<4) != 0, HLE) + fs.setIf(ebx&(1<<9) != 0, ERMS) + fs.setIf(ebx&(1<<11) != 0, RTM) + fs.setIf(ebx&(1<<14) != 0, MPX) + fs.setIf(ebx&(1<<18) != 0, RDSEED) + fs.setIf(ebx&(1<<19) != 0, ADX) + fs.setIf(ebx&(1<<29) != 0, SHA) + + // CPUID.(EAX=7, ECX=0).ECX + fs.setIf(ecx&(1<<5) != 0, WAITPKG) + fs.setIf(ecx&(1<<7) != 0, CETSS) + fs.setIf(ecx&(1<<8) != 0, GFNI) + fs.setIf(ecx&(1<<9) != 0, VAES) + fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) + fs.setIf(ecx&(1<<13) != 0, TME) + fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) + fs.setIf(ecx&(1<<27) != 0, MOVDIRI) + fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) + fs.setIf(ecx&(1<<29) != 0, ENQCMD) + fs.setIf(ecx&(1<<30) != 0, SGXLC) + + // CPUID.(EAX=7, ECX=0).EDX + fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) + fs.setIf(edx&(1<<14) != 0, SERIALIZE) + fs.setIf(edx&(1<<16) != 0, TSXLDTRK) + fs.setIf(edx&(1<<18) != 0, PCONFIG) + fs.setIf(edx&(1<<20) != 0, CETIBT) + fs.setIf(edx&(1<<26) != 0, IBPB) + fs.setIf(edx&(1<<27) != 0, STIBP) + + // CPUID.(EAX=7, ECX=1) + eax1, _, _, _ := cpuidex(7, 1) + fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) + fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) + fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) + fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) + fs.setIf(eax1&(1<<22) != 0, HRESET) + fs.setIf(eax1&(1<<26) != 0, LAM) + + // Only detect AVX-512 features if XGETBV is supported + if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { + // Check for OS support + eax, _ := xgetbv(0) + + // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and + // ZMM16-ZMM31 state are enabled by OS) + /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). + hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3 + if runtime.GOOS == "darwin" { + hasAVX512 = fs.inSet(AVX) && darwinHasAVX512() + } + if hasAVX512 { + fs.setIf(ebx&(1<<16) != 0, AVX512F) + fs.setIf(ebx&(1<<17) != 0, AVX512DQ) + fs.setIf(ebx&(1<<21) != 0, AVX512IFMA) + fs.setIf(ebx&(1<<26) != 0, AVX512PF) + fs.setIf(ebx&(1<<27) != 0, AVX512ER) + fs.setIf(ebx&(1<<28) != 0, AVX512CD) + fs.setIf(ebx&(1<<30) != 0, AVX512BW) + fs.setIf(ebx&(1<<31) != 0, AVX512VL) + // ecx + fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) + fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) + fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) + fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) + fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) + // edx + fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT) + fs.setIf(edx&(1<<22) != 0, AMXBF16) + fs.setIf(edx&(1<<23) != 0, AVX512FP16) + fs.setIf(edx&(1<<24) != 0, AMXTILE) + fs.setIf(edx&(1<<25) != 0, AMXINT8) + // eax1 = CPUID.(EAX=7, ECX=1).EAX + fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + } + } + } + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) + // EAX + // Bit 00: XSAVEOPT is available. + // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. + // Bit 02: Supports XGETBV with ECX = 1 if set. + // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. + // Bits 31 - 04: Reserved. + // EBX + // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. + // ECX + // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. + // EDX? + // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. + if mfi >= 0xd { + if fs.inSet(XSAVE) { + eax, _, _, _ := cpuidex(0xd, 1) + fs.setIf(eax&(1<<0) != 0, XSAVEOPT) + fs.setIf(eax&(1<<1) != 0, XSAVEC) + fs.setIf(eax&(1<<2) != 0, XGETBV1) + fs.setIf(eax&(1<<3) != 0, XSAVES) + } + } + if maxExtendedFunction() >= 0x80000001 { + _, _, c, d := cpuid(0x80000001) + if (c & (1 << 5)) != 0 { + fs.set(LZCNT) + fs.set(POPCNT) + } + // ECX + fs.setIf((c&(1<<0)) != 0, LAHF) + fs.setIf((c&(1<<2)) != 0, SVM) + fs.setIf((c&(1<<6)) != 0, SSE4A) + fs.setIf((c&(1<<10)) != 0, IBS) + fs.setIf((c&(1<<22)) != 0, TOPEXT) + + // EDX + fs.setIf(d&(1<<11) != 0, SYSCALL) + fs.setIf(d&(1<<20) != 0, NX) + fs.setIf(d&(1<<22) != 0, MMXEXT) + fs.setIf(d&(1<<23) != 0, MMX) + fs.setIf(d&(1<<24) != 0, FXSR) + fs.setIf(d&(1<<25) != 0, FXSROPT) + fs.setIf(d&(1<<27) != 0, RDTSCP) + fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) + fs.setIf(d&(1<<31) != 0, AMD3DNOW) + + /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be + * used unless the OS has AVX support. */ + if fs.inSet(AVX) { + fs.setIf((c&(1<<11)) != 0, XOP) + fs.setIf((c&(1<<16)) != 0, FMA4) + } + + } + if maxExtendedFunction() >= 0x80000007 { + _, b, _, d := cpuid(0x80000007) + fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW) + fs.setIf((b&(1<<1)) != 0, SUCCOR) + fs.setIf((b&(1<<2)) != 0, HWA) + fs.setIf((d&(1<<9)) != 0, CPBOOST) + } + + if maxExtendedFunction() >= 0x80000008 { + _, b, _, _ := cpuid(0x80000008) + fs.setIf((b&(1<<9)) != 0, WBNOINVD) + fs.setIf((b&(1<<8)) != 0, MCOMMIT) + fs.setIf((b&(1<<13)) != 0, INT_WBINVD) + fs.setIf((b&(1<<4)) != 0, RDPRU) + fs.setIf((b&(1<<3)) != 0, INVLPGB) + fs.setIf((b&(1<<1)) != 0, MSRIRC) + fs.setIf((b&(1<<0)) != 0, CLZERO) + } + + if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { + _, _, _, edx := cpuid(0x8000000A) + fs.setIf((edx>>0)&1 == 1, SVMNP) + fs.setIf((edx>>1)&1 == 1, LBRVIRT) + fs.setIf((edx>>2)&1 == 1, SVML) + fs.setIf((edx>>3)&1 == 1, NRIPS) + fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) + fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) + fs.setIf((edx>>6)&1 == 1, SVMFBASID) + fs.setIf((edx>>7)&1 == 1, SVMDA) + fs.setIf((edx>>10)&1 == 1, SVMPF) + fs.setIf((edx>>12)&1 == 1, SVMPFT) + } + + if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { + eax, _, _, _ := cpuid(0x8000001b) + fs.setIf((eax>>0)&1 == 1, IBSFFV) + fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM) + fs.setIf((eax>>2)&1 == 1, IBSOPSAM) + fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT) + fs.setIf((eax>>4)&1 == 1, IBSOPCNT) + fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) + fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) + fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) + } + + if maxExtendedFunction() >= 0x8000001f && vend == AMD { + a, _, _, _ := cpuid(0x8000001f) + fs.setIf((a>>0)&1 == 1, SME) + fs.setIf((a>>1)&1 == 1, SEV) + fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) + fs.setIf((a>>3)&1 == 1, SEV_ES) + fs.setIf((a>>4)&1 == 1, SEV_SNP) + fs.setIf((a>>5)&1 == 1, VMPL) + fs.setIf((a>>10)&1 == 1, SME_COHERENT) + fs.setIf((a>>11)&1 == 1, SEV_64BIT) + fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) + fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) + fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) + fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) + fs.setIf((a>>16)&1 == 1, VTE) + fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) + } + + return fs +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s index 089638f51..8587c3a1f 100644 --- a/vendor/github.com/klauspost/cpuid/cpuid_386.s +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s @@ -40,3 +40,8 @@ TEXT ·asmRdtscpAsm(SB), 7, $0 MOVL CX, ecx+8(FP) MOVL DX, edx+12(FP) RET + +// func asmDarwinHasAVX512() bool +TEXT ·asmDarwinHasAVX512(SB), 7, $0 + MOVL $0, eax+0(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s index 3ba0559e9..bc11f8942 100644 --- a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s @@ -40,3 +40,33 @@ TEXT ·asmRdtscpAsm(SB), 7, $0 MOVL CX, ecx+8(FP) MOVL DX, edx+12(FP) RET + +// From https://go-review.googlesource.com/c/sys/+/285572/ +// func asmDarwinHasAVX512() bool +TEXT ·asmDarwinHasAVX512(SB), 7, $0-1 + MOVB $0, ret+0(FP) // default to false + +#ifdef GOOS_darwin // return if not darwin +#ifdef GOARCH_amd64 // return if not amd64 +// These values from: +// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h +#define commpage64_base_address 0x00007fffffe00000 +#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010) +#define commpage64_version (commpage64_base_address+0x01E) +#define hasAVX512F 0x0000004000000000 + MOVQ $commpage64_version, BX + MOVW (BX), AX + CMPW AX, $13 // versions < 13 do not support AVX512 + JL no_avx512 + MOVQ $commpage64_cpu_capabilities64, BX + MOVQ (BX), AX + MOVQ $hasAVX512F, CX + ANDQ CX, AX + JZ no_avx512 + MOVB $1, ret+0(FP) + +no_avx512: +#endif +#endif + RET + diff --git a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s index 8975ee8db..b31d6aec4 100644 --- a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s @@ -1,6 +1,6 @@ // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. -//+build arm64,!gccgo +//+build arm64,!gccgo,!noasm,!appengine // See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt diff --git a/vendor/github.com/klauspost/cpuid/detect_arm64.go b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go index 923a82618..9a53504a0 100644 --- a/vendor/github.com/klauspost/cpuid/detect_arm64.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go @@ -1,9 +1,12 @@ // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. -//+build arm64,!gccgo,!noasm,!appengine +//go:build arm64 && !gccgo && !noasm && !appengine +// +build arm64,!gccgo,!noasm,!appengine package cpuid +import "runtime" + func getMidr() (midr uint64) func getProcFeatures() (procFeatures uint64) func getInstAttributes() (instAttrReg0, instAttrReg1 uint64) @@ -15,14 +18,19 @@ func initCPU() { rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } } -func addInfo(c *CPUInfo) { - // ARM64 disabled for now. - if true { +func addInfo(c *CPUInfo, safe bool) { + // Seems to be safe to assume on ARM64 + c.CacheLine = 64 + detectOS(c) + + // ARM64 disabled since it may crash if interrupt is not intercepted by OS. + if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" { return } - // midr := getMidr() + midr := getMidr() // MIDR_EL1 - Main ID Register + // https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1 // x--------------------------------------------------x // | Name | bits | visible | // |--------------------------------------------------| @@ -37,11 +45,70 @@ func addInfo(c *CPUInfo) { // | Revision | [3-0] | y | // x--------------------------------------------------x - // fmt.Printf(" implementer: 0x%02x\n", (midr>>24)&0xff) - // fmt.Printf(" variant: 0x%01x\n", (midr>>20)&0xf) - // fmt.Printf("architecture: 0x%01x\n", (midr>>16)&0xf) - // fmt.Printf(" part num: 0x%03x\n", (midr>>4)&0xfff) - // fmt.Printf(" revision: 0x%01x\n", (midr>>0)&0xf) + switch (midr >> 24) & 0xff { + case 0xC0: + c.VendorString = "Ampere Computing" + c.VendorID = Ampere + case 0x41: + c.VendorString = "Arm Limited" + c.VendorID = ARM + case 0x42: + c.VendorString = "Broadcom Corporation" + c.VendorID = Broadcom + case 0x43: + c.VendorString = "Cavium Inc" + c.VendorID = Cavium + case 0x44: + c.VendorString = "Digital Equipment Corporation" + c.VendorID = DEC + case 0x46: + c.VendorString = "Fujitsu Ltd" + c.VendorID = Fujitsu + case 0x49: + c.VendorString = "Infineon Technologies AG" + c.VendorID = Infineon + case 0x4D: + c.VendorString = "Motorola or Freescale Semiconductor Inc" + c.VendorID = Motorola + case 0x4E: + c.VendorString = "NVIDIA Corporation" + c.VendorID = NVIDIA + case 0x50: + c.VendorString = "Applied Micro Circuits Corporation" + c.VendorID = AMCC + case 0x51: + c.VendorString = "Qualcomm Inc" + c.VendorID = Qualcomm + case 0x56: + c.VendorString = "Marvell International Ltd" + c.VendorID = Marvell + case 0x69: + c.VendorString = "Intel Corporation" + c.VendorID = Intel + } + + // Lower 4 bits: Architecture + // Architecture Meaning + // 0b0001 Armv4. + // 0b0010 Armv4T. + // 0b0011 Armv5 (obsolete). + // 0b0100 Armv5T. + // 0b0101 Armv5TE. + // 0b0110 Armv5TEJ. + // 0b0111 Armv6. + // 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'. + // Upper 4 bit: Variant + // An IMPLEMENTATION DEFINED variant number. + // Typically, this field is used to distinguish between different product variants, or major revisions of a product. + c.Family = int(midr>>16) & 0xff + + // PartNum, bits [15:4] + // An IMPLEMENTATION DEFINED primary part number for the device. + // On processors implemented by Arm, if the top four bits of the primary + // part number are 0x0 or 0x7, the variant and architecture are encoded differently. + // Revision, bits [3:0] + // An IMPLEMENTATION DEFINED revision number for the device. + c.Model = int(midr) & 0xffff procFeatures := getProcFeatures() @@ -68,25 +135,18 @@ func addInfo(c *CPUInfo) { // | EL0 | [3-0] | n | // x--------------------------------------------------x - var f ArmFlags + var f flagSet // if procFeatures&(0xf<<48) != 0 { // fmt.Println("DIT") // } - if procFeatures&(0xf<<32) != 0 { - f |= SVE - } + f.setIf(procFeatures&(0xf<<32) != 0, SVE) if procFeatures&(0xf<<20) != 15<<20 { - f |= ASIMD - if procFeatures&(0xf<<20) == 1<<20 { - // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1 - // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic. - f |= FPHP - f |= ASIMDHP - } - } - if procFeatures&(0xf<<16) != 0 { - f |= FP + f.set(ASIMD) + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1 + // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic. + f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP) } + f.setIf(procFeatures&(0xf<<16) != 0, FP) instAttrReg0, instAttrReg1 := getInstAttributes() @@ -127,46 +187,22 @@ func addInfo(c *CPUInfo) { // if instAttrReg0&(0xf<<48) != 0 { // fmt.Println("FHM") // } - if instAttrReg0&(0xf<<44) != 0 { - f |= ASIMDDP - } - if instAttrReg0&(0xf<<40) != 0 { - f |= SM4 - } - if instAttrReg0&(0xf<<36) != 0 { - f |= SM3 - } - if instAttrReg0&(0xf<<32) != 0 { - f |= SHA3 - } - if instAttrReg0&(0xf<<28) != 0 { - f |= ASIMDRDM - } - if instAttrReg0&(0xf<<20) != 0 { - f |= ATOMICS - } - if instAttrReg0&(0xf<<16) != 0 { - f |= CRC32 - } - if instAttrReg0&(0xf<<12) != 0 { - f |= SHA2 - } - if instAttrReg0&(0xf<<12) == 2<<12 { - // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 - // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented. - f |= SHA512 - } - if instAttrReg0&(0xf<<8) != 0 { - f |= SHA1 - } - if instAttrReg0&(0xf<<4) != 0 { - f |= AES - } - if instAttrReg0&(0xf<<4) == 2<<4 { - // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 - // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities. - f |= PMULL - } + f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP) + f.setIf(instAttrReg0&(0xf<<40) != 0, SM4) + f.setIf(instAttrReg0&(0xf<<36) != 0, SM3) + f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3) + f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM) + f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS) + f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32) + f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2) + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented. + f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512) + f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1) + f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM) + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities. + f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL) // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1 // @@ -194,26 +230,18 @@ func addInfo(c *CPUInfo) { // if instAttrReg1&(0xf<<28) != 0 { // fmt.Println("GPI") // } - if instAttrReg1&(0xf<<28) != 24 { - f |= GPA - } - if instAttrReg1&(0xf<<20) != 0 { - f |= LRCPC - } - if instAttrReg1&(0xf<<16) != 0 { - f |= FCMA - } - if instAttrReg1&(0xf<<12) != 0 { - f |= JSCVT - } + f.setIf(instAttrReg1&(0xf<<28) != 24, GPA) + f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC) + f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA) + f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT) // if instAttrReg1&(0xf<<8) != 0 { // fmt.Println("API") // } // if instAttrReg1&(0xf<<4) != 0 { // fmt.Println("APA") // } - if instAttrReg1&(0xf<<0) != 0 { - f |= DCPOP - } - c.Arm = f + f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP) + + // Store + c.featureSet.or(f) } diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go index 970ff3d22..9636c2bc1 100644 --- a/vendor/github.com/klauspost/cpuid/detect_ref.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go @@ -1,6 +1,7 @@ // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. -//+build !amd64,!386,!arm64 gccgo noasm appengine +//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine +// +build !amd64,!386,!arm64 gccgo noasm appengine package cpuid @@ -11,4 +12,4 @@ func initCPU() { rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } } -func addInfo(info *CPUInfo) {} +func addInfo(info *CPUInfo, safe bool) {} diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go index 363951b3b..c946824ec 100644 --- a/vendor/github.com/klauspost/cpuid/detect_intel.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go @@ -1,6 +1,7 @@ // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. -//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine +//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine) +// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine package cpuid @@ -8,26 +9,28 @@ func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) func asmXgetbv(index uint32) (eax, edx uint32) func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +func asmDarwinHasAVX512() bool func initCPU() { cpuid = asmCpuid cpuidex = asmCpuidex xgetbv = asmXgetbv rdtscpAsm = asmRdtscpAsm + darwinHasAVX512 = asmDarwinHasAVX512 } -func addInfo(c *CPUInfo) { +func addInfo(c *CPUInfo, safe bool) { c.maxFunc = maxFunctionID() c.maxExFunc = maxExtendedFunction() c.BrandName = brandName() c.CacheLine = cacheLine() - c.Family, c.Model = familyModel() - c.Features = support() - c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0) + c.Family, c.Model, c.Stepping = familyModel() + c.featureSet = support() + c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC)) c.ThreadsPerCore = threadsPerCore() c.LogicalCores = logicalCores() c.PhysicalCores = physicalCores() c.VendorID, c.VendorString = vendorID() - c.Hz = hertz(c.BrandName) c.cacheSize() + c.frequencies() } diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go new file mode 100644 index 000000000..d12e547c4 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go @@ -0,0 +1,235 @@ +// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT. + +package cpuid + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ADX-1] + _ = x[AESNI-2] + _ = x[AMD3DNOW-3] + _ = x[AMD3DNOWEXT-4] + _ = x[AMXBF16-5] + _ = x[AMXINT8-6] + _ = x[AMXTILE-7] + _ = x[AVX-8] + _ = x[AVX2-9] + _ = x[AVX512BF16-10] + _ = x[AVX512BITALG-11] + _ = x[AVX512BW-12] + _ = x[AVX512CD-13] + _ = x[AVX512DQ-14] + _ = x[AVX512ER-15] + _ = x[AVX512F-16] + _ = x[AVX512FP16-17] + _ = x[AVX512IFMA-18] + _ = x[AVX512PF-19] + _ = x[AVX512VBMI-20] + _ = x[AVX512VBMI2-21] + _ = x[AVX512VL-22] + _ = x[AVX512VNNI-23] + _ = x[AVX512VP2INTERSECT-24] + _ = x[AVX512VPOPCNTDQ-25] + _ = x[AVXSLOW-26] + _ = x[AVXVNNI-27] + _ = x[BMI1-28] + _ = x[BMI2-29] + _ = x[CETIBT-30] + _ = x[CETSS-31] + _ = x[CLDEMOTE-32] + _ = x[CLMUL-33] + _ = x[CLZERO-34] + _ = x[CMOV-35] + _ = x[CMPSB_SCADBS_SHORT-36] + _ = x[CMPXCHG8-37] + _ = x[CPBOOST-38] + _ = x[CX16-39] + _ = x[ENQCMD-40] + _ = x[ERMS-41] + _ = x[F16C-42] + _ = x[FMA3-43] + _ = x[FMA4-44] + _ = x[FXSR-45] + _ = x[FXSROPT-46] + _ = x[GFNI-47] + _ = x[HLE-48] + _ = x[HRESET-49] + _ = x[HTT-50] + _ = x[HWA-51] + _ = x[HYPERVISOR-52] + _ = x[IBPB-53] + _ = x[IBS-54] + _ = x[IBSBRNTRGT-55] + _ = x[IBSFETCHSAM-56] + _ = x[IBSFFV-57] + _ = x[IBSOPCNT-58] + _ = x[IBSOPCNTEXT-59] + _ = x[IBSOPSAM-60] + _ = x[IBSRDWROPCNT-61] + _ = x[IBSRIPINVALIDCHK-62] + _ = x[IBS_PREVENTHOST-63] + _ = x[INT_WBINVD-64] + _ = x[INVLPGB-65] + _ = x[LAHF-66] + _ = x[LAM-67] + _ = x[LBRVIRT-68] + _ = x[LZCNT-69] + _ = x[MCAOVERFLOW-70] + _ = x[MCOMMIT-71] + _ = x[MMX-72] + _ = x[MMXEXT-73] + _ = x[MOVBE-74] + _ = x[MOVDIR64B-75] + _ = x[MOVDIRI-76] + _ = x[MOVSB_ZL-77] + _ = x[MPX-78] + _ = x[MSRIRC-79] + _ = x[MSR_PAGEFLUSH-80] + _ = x[NRIPS-81] + _ = x[NX-82] + _ = x[OSXSAVE-83] + _ = x[PCONFIG-84] + _ = x[POPCNT-85] + _ = x[RDPRU-86] + _ = x[RDRAND-87] + _ = x[RDSEED-88] + _ = x[RDTSCP-89] + _ = x[RTM-90] + _ = x[RTM_ALWAYS_ABORT-91] + _ = x[SERIALIZE-92] + _ = x[SEV-93] + _ = x[SEV_64BIT-94] + _ = x[SEV_ALTERNATIVE-95] + _ = x[SEV_DEBUGSWAP-96] + _ = x[SEV_ES-97] + _ = x[SEV_RESTRICTED-98] + _ = x[SEV_SNP-99] + _ = x[SGX-100] + _ = x[SGXLC-101] + _ = x[SHA-102] + _ = x[SME-103] + _ = x[SME_COHERENT-104] + _ = x[SSE-105] + _ = x[SSE2-106] + _ = x[SSE3-107] + _ = x[SSE4-108] + _ = x[SSE42-109] + _ = x[SSE4A-110] + _ = x[SSSE3-111] + _ = x[STIBP-112] + _ = x[STOSB_SHORT-113] + _ = x[SUCCOR-114] + _ = x[SVM-115] + _ = x[SVMDA-116] + _ = x[SVMFBASID-117] + _ = x[SVML-118] + _ = x[SVMNP-119] + _ = x[SVMPF-120] + _ = x[SVMPFT-121] + _ = x[SYSCALL-122] + _ = x[SYSEE-123] + _ = x[TBM-124] + _ = x[TOPEXT-125] + _ = x[TME-126] + _ = x[TSCRATEMSR-127] + _ = x[TSXLDTRK-128] + _ = x[VAES-129] + _ = x[VMCBCLEAN-130] + _ = x[VMPL-131] + _ = x[VMSA_REGPROT-132] + _ = x[VMX-133] + _ = x[VPCLMULQDQ-134] + _ = x[VTE-135] + _ = x[WAITPKG-136] + _ = x[WBNOINVD-137] + _ = x[X87-138] + _ = x[XGETBV1-139] + _ = x[XOP-140] + _ = x[XSAVE-141] + _ = x[XSAVEC-142] + _ = x[XSAVEOPT-143] + _ = x[XSAVES-144] + _ = x[AESARM-145] + _ = x[ARMCPUID-146] + _ = x[ASIMD-147] + _ = x[ASIMDDP-148] + _ = x[ASIMDHP-149] + _ = x[ASIMDRDM-150] + _ = x[ATOMICS-151] + _ = x[CRC32-152] + _ = x[DCPOP-153] + _ = x[EVTSTRM-154] + _ = x[FCMA-155] + _ = x[FP-156] + _ = x[FPHP-157] + _ = x[GPA-158] + _ = x[JSCVT-159] + _ = x[LRCPC-160] + _ = x[PMULL-161] + _ = x[SHA1-162] + _ = x[SHA2-163] + _ = x[SHA3-164] + _ = x[SHA512-165] + _ = x[SM3-166] + _ = x[SM4-167] + _ = x[SVE-168] + _ = x[lastID-169] + _ = x[firstID-0] +} + +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" + +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122} + +func (i FeatureID) String() string { + if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { + return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[VendorUnknown-0] + _ = x[Intel-1] + _ = x[AMD-2] + _ = x[VIA-3] + _ = x[Transmeta-4] + _ = x[NSC-5] + _ = x[KVM-6] + _ = x[MSVM-7] + _ = x[VMware-8] + _ = x[XenHVM-9] + _ = x[Bhyve-10] + _ = x[Hygon-11] + _ = x[SiS-12] + _ = x[RDC-13] + _ = x[Ampere-14] + _ = x[ARM-15] + _ = x[Broadcom-16] + _ = x[Cavium-17] + _ = x[DEC-18] + _ = x[Fujitsu-19] + _ = x[Infineon-20] + _ = x[Motorola-21] + _ = x[NVIDIA-22] + _ = x[AMCC-23] + _ = x[Qualcomm-24] + _ = x[Marvell-25] + _ = x[lastVendor-26] +} + +const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor" + +var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155} + +func (i Vendor) String() string { + if i < 0 || i >= Vendor(len(_Vendor_index)-1) { + return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]] +} diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go new file mode 100644 index 000000000..d91d02109 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go @@ -0,0 +1,121 @@ +// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. + +package cpuid + +import ( + "runtime" + "strings" + + "golang.org/x/sys/unix" +) + +func detectOS(c *CPUInfo) bool { + if runtime.GOOS != "ios" { + tryToFillCPUInfoFomSysctl(c) + } + // There are no hw.optional sysctl values for the below features on Mac OS 11.0 + // to detect their supported state dynamically. Assume the CPU features that + // Apple Silicon M1 supports to be available as a minimal set of features + // to all Go programs running on darwin/arm64. + // TODO: Add more if we know them. + c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2) + + return true +} + +func sysctlGetBool(name string) bool { + value, err := unix.SysctlUint32(name) + if err != nil { + return false + } + return value != 0 +} + +func sysctlGetString(name string) string { + value, err := unix.Sysctl(name) + if err != nil { + return "" + } + return value +} + +func sysctlGetInt(unknown int, names ...string) int { + for _, name := range names { + value, err := unix.SysctlUint32(name) + if err != nil { + continue + } + if value != 0 { + return int(value) + } + } + return unknown +} + +func sysctlGetInt64(unknown int, names ...string) int { + for _, name := range names { + value64, err := unix.SysctlUint64(name) + if err != nil { + continue + } + if int(value64) != unknown { + return int(value64) + } + } + return unknown +} + +func setFeature(c *CPUInfo, name string, feature FeatureID) { + c.featureSet.setIf(sysctlGetBool(name), feature) +} +func tryToFillCPUInfoFomSysctl(c *CPUInfo) { + c.BrandName = sysctlGetString("machdep.cpu.brand_string") + + if len(c.BrandName) != 0 { + c.VendorString = strings.Fields(c.BrandName)[0] + } + + c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu") + c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") / + sysctlGetInt(1, "hw.physicalcpu") + c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count") + c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily") + c.Model = sysctlGetInt(0, "machdep.cpu.model") + c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize") + c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize") + c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize") + c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize") + c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize") + + // from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile + setFeature(c, "hw.optional.arm.FEAT_AES", AESARM) + setFeature(c, "hw.optional.AdvSIMD", ASIMD) + setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP) + setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM) + setFeature(c, "hw.optional.FEAT_CRC32", CRC32) + setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP) + // setFeature(c, "", EVTSTRM) + setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA) + setFeature(c, "hw.optional.arm.FEAT_FP", FP) + setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP) + setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA) + setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT) + setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC) + setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL) + setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1) + setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2) + setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3) + setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512) + // setFeature(c, "", SM3) + // setFeature(c, "", SM4) + setFeature(c, "hw.optional.arm.FEAT_SVE", SVE) + + // from empirical observation + setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP) + setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS) + setFeature(c, "hw.optional.floatingpoint", FP) + setFeature(c, "hw.optional.armv8_2_sha3", SHA3) + setFeature(c, "hw.optional.armv8_2_sha512", SHA512) + setFeature(c, "hw.optional.armv8_3_compnum", FCMA) + setFeature(c, "hw.optional.armv8_crc32", CRC32) +} diff --git a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go new file mode 100644 index 000000000..ee278b9e4 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go @@ -0,0 +1,130 @@ +// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. + +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file located +// here https://github.com/golang/sys/blob/master/LICENSE + +package cpuid + +import ( + "encoding/binary" + "io/ioutil" + "runtime" +) + +// HWCAP bits. +const ( + hwcap_FP = 1 << 0 + hwcap_ASIMD = 1 << 1 + hwcap_EVTSTRM = 1 << 2 + hwcap_AES = 1 << 3 + hwcap_PMULL = 1 << 4 + hwcap_SHA1 = 1 << 5 + hwcap_SHA2 = 1 << 6 + hwcap_CRC32 = 1 << 7 + hwcap_ATOMICS = 1 << 8 + hwcap_FPHP = 1 << 9 + hwcap_ASIMDHP = 1 << 10 + hwcap_CPUID = 1 << 11 + hwcap_ASIMDRDM = 1 << 12 + hwcap_JSCVT = 1 << 13 + hwcap_FCMA = 1 << 14 + hwcap_LRCPC = 1 << 15 + hwcap_DCPOP = 1 << 16 + hwcap_SHA3 = 1 << 17 + hwcap_SM3 = 1 << 18 + hwcap_SM4 = 1 << 19 + hwcap_ASIMDDP = 1 << 20 + hwcap_SHA512 = 1 << 21 + hwcap_SVE = 1 << 22 + hwcap_ASIMDFHM = 1 << 23 +) + +func detectOS(c *CPUInfo) bool { + // For now assuming no hyperthreading is reasonable. + c.LogicalCores = runtime.NumCPU() + c.PhysicalCores = c.LogicalCores + c.ThreadsPerCore = 1 + if hwcap == 0 { + // We did not get values from the runtime. + // Try reading /proc/self/auxv + + // From https://github.com/golang/sys + const ( + _AT_HWCAP = 16 + _AT_HWCAP2 = 26 + + uintSize = int(32 << (^uint(0) >> 63)) + ) + + buf, err := ioutil.ReadFile("/proc/self/auxv") + if err != nil { + // e.g. on android /proc/self/auxv is not accessible, so silently + // ignore the error and leave Initialized = false. On some + // architectures (e.g. arm64) doinit() implements a fallback + // readout and will set Initialized = true again. + return false + } + bo := binary.LittleEndian + for len(buf) >= 2*(uintSize/8) { + var tag, val uint + switch uintSize { + case 32: + tag = uint(bo.Uint32(buf[0:])) + val = uint(bo.Uint32(buf[4:])) + buf = buf[8:] + case 64: + tag = uint(bo.Uint64(buf[0:])) + val = uint(bo.Uint64(buf[8:])) + buf = buf[16:] + } + switch tag { + case _AT_HWCAP: + hwcap = val + case _AT_HWCAP2: + // Not used + } + } + if hwcap == 0 { + return false + } + } + + // HWCap was populated by the runtime from the auxiliary vector. + // Use HWCap information since reading aarch64 system registers + // is not supported in user space on older linux kernels. + c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP) + c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM) + c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID) + c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32) + c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP) + c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM) + c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA) + c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP) + c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP) + c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT) + c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC) + c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3) + c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512) + c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3) + c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4) + c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE) + + // The Samsung S9+ kernel reports support for atomics, but not all cores + // actually support them, resulting in SIGILL. See issue #28431. + // TODO(elias.naur): Only disable the optimization on bad chipsets on android. + c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS) + + return true +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} diff --git a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go new file mode 100644 index 000000000..8733ba343 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go @@ -0,0 +1,16 @@ +// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file. + +//go:build arm64 && !linux && !darwin +// +build arm64,!linux,!darwin + +package cpuid + +import "runtime" + +func detectOS(c *CPUInfo) bool { + c.PhysicalCores = runtime.NumCPU() + // For now assuming 1 thread per core... + c.ThreadsPerCore = 1 + c.LogicalCores = c.PhysicalCores + return false +} diff --git a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go new file mode 100644 index 000000000..f8f201b5f --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go @@ -0,0 +1,8 @@ +// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file. + +//go:build nounsafe +// +build nounsafe + +package cpuid + +var hwcap uint diff --git a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go new file mode 100644 index 000000000..92af622eb --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go @@ -0,0 +1,11 @@ +// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file. + +//go:build !nounsafe +// +build !nounsafe + +package cpuid + +import _ "unsafe" // needed for go:linkname + +//go:linkname hwcap internal/cpu.HWCap +var hwcap uint diff --git a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh new file mode 100644 index 000000000..471d986d2 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +go tool dist list | while IFS=/ read os arch; do + echo "Checking $os/$arch..." + echo " normal" + GOARCH=$arch GOOS=$os go build -o /dev/null . + echo " noasm" + GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null . + echo " appengine" + GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null . + echo " noasm,appengine" + GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null . +done |