summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/cpuid/v2
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2')
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/.gitignore24
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml74
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt35
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/LICENSE22
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/README.md258
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid.go1291
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid_386.s47
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s72
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s26
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/detect_arm64.go247
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/detect_ref.go15
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/detect_x86.go36
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/featureid_string.go235
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go121
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go130
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go16
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go8
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go11
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/test-architectures.sh15
19 files changed, 2683 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/.gitignore b/vendor/github.com/klauspost/cpuid/v2/.gitignore
new file mode 100644
index 000000000..daf913b1b
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/.gitignore
@@ -0,0 +1,24 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
diff --git a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
new file mode 100644
index 000000000..944cc0007
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
@@ -0,0 +1,74 @@
+# This is an example goreleaser.yaml file with some sane defaults.
+# Make sure to check the documentation at http://goreleaser.com
+
+builds:
+ -
+ id: "cpuid"
+ binary: cpuid
+ main: ./cmd/cpuid/main.go
+ env:
+ - CGO_ENABLED=0
+ flags:
+ - -ldflags=-s -w
+ goos:
+ - aix
+ - linux
+ - freebsd
+ - netbsd
+ - windows
+ - darwin
+ goarch:
+ - 386
+ - amd64
+ - arm64
+ goarm:
+ - 7
+
+archives:
+ -
+ id: cpuid
+ name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
+ replacements:
+ aix: AIX
+ darwin: OSX
+ linux: Linux
+ windows: Windows
+ 386: i386
+ amd64: x86_64
+ freebsd: FreeBSD
+ netbsd: NetBSD
+ format_overrides:
+ - goos: windows
+ format: zip
+ files:
+ - LICENSE
+checksum:
+ name_template: 'checksums.txt'
+snapshot:
+ name_template: "{{ .Tag }}-next"
+changelog:
+ sort: asc
+ filters:
+ exclude:
+ - '^doc:'
+ - '^docs:'
+ - '^test:'
+ - '^tests:'
+ - '^Update\sREADME.md'
+
+nfpms:
+ -
+ file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+ vendor: Klaus Post
+ homepage: https://github.com/klauspost/cpuid
+ maintainer: Klaus Post <klauspost@gmail.com>
+ description: CPUID Tool
+ license: BSD 3-Clause
+ formats:
+ - deb
+ - rpm
+ replacements:
+ darwin: Darwin
+ linux: Linux
+ freebsd: FreeBSD
+ amd64: x86_64
diff --git a/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
new file mode 100644
index 000000000..2ef4714f7
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
@@ -0,0 +1,35 @@
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/v2/LICENSE b/vendor/github.com/klauspost/cpuid/v2/LICENSE
new file mode 100644
index 000000000..5cec7ee94
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md
new file mode 100644
index 000000000..ea7df3dd8
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@@ -0,0 +1,258 @@
+# cpuid
+Package cpuid provides information about the CPU running the current program.
+
+CPU features are detected on startup, and kept for fast access through the life of the application.
+Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
+
+You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+
+Package home: https://github.com/klauspost/cpuid
+
+[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
+[![Build Status][3]][4]
+
+[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
+[4]: https://travis-ci.org/klauspost/cpuid
+
+## installing
+
+`go get -u github.com/klauspost/cpuid/v2` using modules.
+
+Drop `v2` for others.
+
+## example
+
+```Go
+package main
+
+import (
+ "fmt"
+ "strings"
+
+ . "github.com/klauspost/cpuid/v2"
+)
+
+func main() {
+ // Print basic CPU information:
+ fmt.Println("Name:", CPU.BrandName)
+ fmt.Println("PhysicalCores:", CPU.PhysicalCores)
+ fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
+ fmt.Println("LogicalCores:", CPU.LogicalCores)
+ fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
+ fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
+ fmt.Println("Cacheline bytes:", CPU.CacheLine)
+ fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
+ fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
+ fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
+ fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
+ fmt.Println("Frequency", CPU.Hz, "hz")
+
+ // Test if we have these specific features:
+ if CPU.Supports(SSE, SSE2) {
+ fmt.Println("We have Streaming SIMD 2 Extensions")
+ }
+}
+```
+
+Sample output:
+```
+>go run main.go
+Name: AMD Ryzen 9 3950X 16-Core Processor
+PhysicalCores: 16
+ThreadsPerCore: 2
+LogicalCores: 32
+Family 23 Model: 113 Vendor ID: AMD
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
+Cacheline bytes: 64
+L1 Data Cache: 32768 bytes
+L1 Instruction Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+Frequency 0 hz
+We have Streaming SIMD 2 Extensions
+```
+
+# usage
+
+The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
+A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.
+
+Note that for some cpu/os combinations some features will not be detected.
+`amd64` has rather good support and should work reliably on all platforms.
+
+Note that hypervisors may not pass through all CPU features.
+
+## arm64 feature detection
+
+Not all operating systems provide ARM features directly
+and there is no safe way to do so for the rest.
+
+Currently `arm64/linux` and `arm64/freebsd` should be quite reliable.
+`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
+
+A `DetectARM()` can be used if you are able to control your deployment,
+it will detect CPU features, but may crash if the OS doesn't intercept the calls.
+A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
+
+Note that currently only features are detected on ARM,
+no additional information is currently available.
+
+## flags
+
+It is possible to add flags that affects cpu detection.
+
+For this the `Flags()` command is provided.
+
+This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
+
+This means that any detection used in `init()` functions will not contain these flags.
+
+Example:
+
+```Go
+package main
+
+import (
+ "flag"
+ "fmt"
+ "strings"
+
+ "github.com/klauspost/cpuid/v2"
+)
+
+func main() {
+ cpuid.Flags()
+ flag.Parse()
+ cpuid.Detect()
+
+ // Test if we have these specific features:
+ if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
+ fmt.Println("We have Streaming SIMD 2 Extensions")
+ }
+}
+```
+
+## commandline
+
+Download as binary from: https://github.com/klauspost/cpuid/releases
+
+Install from source:
+
+`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
+
+### Example
+
+```
+λ cpuid
+Name: AMD Ryzen 9 3950X 16-Core Processor
+Vendor String: AuthenticAMD
+Vendor ID: AMD
+PhysicalCores: 16
+Threads Per Core: 2
+Logical Cores: 32
+CPU Family 23 Model: 113
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
+Microarchitecture level: 3
+Cacheline bytes: 64
+L1 Instruction Cache: 32768 bytes
+L1 Data Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+
+```
+### JSON Output:
+
+```
+λ cpuid --json
+{
+ "BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
+ "VendorID": 2,
+ "VendorString": "AuthenticAMD",
+ "PhysicalCores": 16,
+ "ThreadsPerCore": 2,
+ "LogicalCores": 32,
+ "Family": 23,
+ "Model": 113,
+ "CacheLine": 64,
+ "Hz": 0,
+ "BoostFreq": 0,
+ "Cache": {
+ "L1I": 32768,
+ "L1D": 32768,
+ "L2": 524288,
+ "L3": 16777216
+ },
+ "SGX": {
+ "Available": false,
+ "LaunchControl": false,
+ "SGX1Supported": false,
+ "SGX2Supported": false,
+ "MaxEnclaveSizeNot64": 0,
+ "MaxEnclaveSize64": 0,
+ "EPCSections": null
+ },
+ "Features": [
+ "ADX",
+ "AESNI",
+ "AVX",
+ "AVX2",
+ "BMI1",
+ "BMI2",
+ "CLMUL",
+ "CLZERO",
+ "CMOV",
+ "CMPXCHG8",
+ "CPBOOST",
+ "CX16",
+ "F16C",
+ "FMA3",
+ "FXSR",
+ "FXSROPT",
+ "HTT",
+ "HYPERVISOR",
+ "LAHF",
+ "LZCNT",
+ "MCAOVERFLOW",
+ "MMX",
+ "MMXEXT",
+ "MOVBE",
+ "NX",
+ "OSXSAVE",
+ "POPCNT",
+ "RDRAND",
+ "RDSEED",
+ "RDTSCP",
+ "SCE",
+ "SHA",
+ "SSE",
+ "SSE2",
+ "SSE3",
+ "SSE4",
+ "SSE42",
+ "SSE4A",
+ "SSSE3",
+ "SUCCOR",
+ "X87",
+ "XSAVE"
+ ],
+ "X64Level": 3
+}
+```
+
+### Check CPU microarch level
+
+```
+λ cpuid --check-level=3
+2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
+Exit Code 0
+
+λ cpuid --check-level=4
+2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
+Exit Code 1
+```
+
+# license
+
+This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
new file mode 100644
index 000000000..27f33250e
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -0,0 +1,1291 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) as well as arm64 is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package cpuid
+
+import (
+ "flag"
+ "fmt"
+ "math"
+ "math/bits"
+ "os"
+ "runtime"
+ "strings"
+)
+
+// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
+// and Processor Programming Reference (PPR)
+
+// Vendor is a representation of a CPU vendor.
+type Vendor int
+
+const (
+ VendorUnknown Vendor = iota
+ Intel
+ AMD
+ VIA
+ Transmeta
+ NSC
+ KVM // Kernel-based Virtual Machine
+ MSVM // Microsoft Hyper-V or Windows Virtual PC
+ VMware
+ XenHVM
+ Bhyve
+ Hygon
+ SiS
+ RDC
+
+ Ampere
+ ARM
+ Broadcom
+ Cavium
+ DEC
+ Fujitsu
+ Infineon
+ Motorola
+ NVIDIA
+ AMCC
+ Qualcomm
+ Marvell
+
+ lastVendor
+)
+
+//go:generate stringer -type=FeatureID,Vendor
+
+// FeatureID is the ID of a specific cpu feature.
+type FeatureID int
+
+const (
+ // Keep index -1 as unknown
+ UNKNOWN = -1
+
+ // Add features
+ ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+ AESNI // Advanced Encryption Standard New Instructions
+ AMD3DNOW // AMD 3DNOW
+ AMD3DNOWEXT // AMD 3DNowExt
+ AMXBF16 // Tile computational operations on BFLOAT16 numbers
+ AMXINT8 // Tile computational operations on 8-bit integers
+ AMXTILE // Tile architecture
+ AVX // AVX functions
+ AVX2 // AVX2 functions
+ AVX512BF16 // AVX-512 BFLOAT16 Instructions
+ AVX512BITALG // AVX-512 Bit Algorithms
+ AVX512BW // AVX-512 Byte and Word Instructions
+ AVX512CD // AVX-512 Conflict Detection Instructions
+ AVX512DQ // AVX-512 Doubleword and Quadword Instructions
+ AVX512ER // AVX-512 Exponential and Reciprocal Instructions
+ AVX512F // AVX-512 Foundation
+ AVX512FP16 // AVX-512 FP16 Instructions
+ AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
+ AVX512PF // AVX-512 Prefetch Instructions
+ AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
+ AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
+ AVX512VL // AVX-512 Vector Length Extensions
+ AVX512VNNI // AVX-512 Vector Neural Network Instructions
+ AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
+ AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
+ AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
+ AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
+ BMI1 // Bit Manipulation Instruction Set 1
+ BMI2 // Bit Manipulation Instruction Set 2
+ CETIBT // Intel CET Indirect Branch Tracking
+ CETSS // Intel CET Shadow Stack
+ CLDEMOTE // Cache Line Demote
+ CLMUL // Carry-less Multiplication
+ CLZERO // CLZERO instruction supported
+ CMOV // i686 CMOV
+ CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
+ CMPXCHG8 // CMPXCHG8 instruction
+ CPBOOST // Core Performance Boost
+ CX16 // CMPXCHG16B Instruction
+ ENQCMD // Enqueue Command
+ ERMS // Enhanced REP MOVSB/STOSB
+ F16C // Half-precision floating-point conversion
+ FMA3 // Intel FMA 3. Does not imply AVX.
+ FMA4 // Bulldozer FMA4 functions
+ FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
+ FXSROPT // FXSAVE/FXRSTOR optimizations
+ GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
+ HLE // Hardware Lock Elision
+ HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
+ HTT // Hyperthreading (enabled)
+ HWA // Hardware assert supported. Indicates support for MSRC001_10
+ HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
+ IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+ IBS // Instruction Based Sampling (AMD)
+ IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
+ IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
+ IBSFFV // Instruction Based Sampling Feature (AMD)
+ IBSOPCNT // Instruction Based Sampling Feature (AMD)
+ IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
+ IBSOPSAM // Instruction Based Sampling Feature (AMD)
+ IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
+ IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
+ IBS_PREVENTHOST // Disallowing IBS use by the host supported
+ INT_WBINVD // WBINVD/WBNOINVD are interruptible.
+ INVLPGB // NVLPGB and TLBSYNC instruction supported
+ LAHF // LAHF/SAHF in long mode
+ LAM // If set, CPU supports Linear Address Masking
+ LBRVIRT // LBR virtualization
+ LZCNT // LZCNT instruction
+ MCAOVERFLOW // MCA overflow recovery support.
+ MCOMMIT // MCOMMIT instruction supported
+ MMX // standard MMX
+ MMXEXT // SSE integer functions or AMD MMX ext
+ MOVBE // MOVBE instruction (big-endian)
+ MOVDIR64B // Move 64 Bytes as Direct Store
+ MOVDIRI // Move Doubleword as Direct Store
+ MOVSB_ZL // Fast Zero-Length MOVSB
+ MPX // Intel MPX (Memory Protection Extensions)
+ MSRIRC // Instruction Retired Counter MSR available
+ MSR_PAGEFLUSH // Page Flush MSR available
+ NRIPS // Indicates support for NRIP save on VMEXIT
+ NX // NX (No-Execute) bit
+ OSXSAVE // XSAVE enabled by OS
+ PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
+ POPCNT // POPCNT instruction
+ RDPRU // RDPRU instruction supported
+ RDRAND // RDRAND instruction is available
+ RDSEED // RDSEED instruction is available
+ RDTSCP // RDTSCP Instruction
+ RTM // Restricted Transactional Memory
+ RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
+ SERIALIZE // Serialize Instruction Execution
+ SEV // AMD Secure Encrypted Virtualization supported
+ SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
+ SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
+ SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
+ SEV_ES // AMD SEV Encrypted State supported
+ SEV_RESTRICTED // AMD SEV Restricted Injection supported
+ SEV_SNP // AMD SEV Secure Nested Paging supported
+ SGX // Software Guard Extensions
+ SGXLC // Software Guard Extensions Launch Control
+ SHA // Intel SHA Extensions
+ SME // AMD Secure Memory Encryption supported
+ SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
+ SSE // SSE functions
+ SSE2 // P4 SSE functions
+ SSE3 // Prescott SSE3 functions
+ SSE4 // Penryn SSE4.1 functions
+ SSE42 // Nehalem SSE4.2 functions
+ SSE4A // AMD Barcelona microarchitecture SSE4a instructions
+ SSSE3 // Conroe SSSE3 functions
+ STIBP // Single Thread Indirect Branch Predictors
+ STOSB_SHORT // Fast short STOSB
+ SUCCOR // Software uncorrectable error containment and recovery capability.
+ SVM // AMD Secure Virtual Machine
+ SVMDA // Indicates support for the SVM decode assists.
+ SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+ SVML // AMD SVM lock. Indicates support for SVM-Lock.
+ SVMNP // AMD SVM nested paging
+ SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
+ SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+ SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+ SYSEE // SYSENTER and SYSEXIT instructions
+ TBM // AMD Trailing Bit Manipulation
+ TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+ TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+ TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
+ TSXLDTRK // Intel TSX Suspend Load Address Tracking
+ VAES // Vector AES. AVX(512) versions requires additional checks.
+ VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
+ VMPL // AMD VM Permission Levels supported
+ VMSA_REGPROT // AMD VMSA Register Protection supported
+ VMX // Virtual Machine Extensions
+ VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+ VTE // AMD Virtual Transparent Encryption supported
+ WAITPKG // TPAUSE, UMONITOR, UMWAIT
+ WBNOINVD // Write Back and Do Not Invalidate Cache
+ X87 // FPU
+ XGETBV1 // Supports XGETBV with ECX = 1
+ XOP // Bulldozer XOP functions
+ XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
+ XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
+ XSAVEOPT // XSAVEOPT available
+ XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
+
+ // ARM features:
+ AESARM // AES instructions
+ ARMCPUID // Some CPU ID registers readable at user-level
+ ASIMD // Advanced SIMD
+ ASIMDDP // SIMD Dot Product
+ ASIMDHP // Advanced SIMD half-precision floating point
+ ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+ ATOMICS // Large System Extensions (LSE)
+ CRC32 // CRC32/CRC32C instructions
+ DCPOP // Data cache clean to Point of Persistence (DC CVAP)
+ EVTSTRM // Generic timer
+ FCMA // Floatin point complex number addition and multiplication
+ FP // Single-precision and double-precision floating point
+ FPHP // Half-precision floating point
+ GPA // Generic Pointer Authentication
+ JSCVT // Javascript-style double->int convert (FJCVTZS)
+ LRCPC // Weaker release consistency (LDAPR, etc)
+ PMULL // Polynomial Multiply instructions (PMULL/PMULL2)
+ SHA1 // SHA-1 instructions (SHA1C, etc)
+ SHA2 // SHA-2 instructions (SHA256H, etc)
+ SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+ SHA512 // SHA512 instructions
+ SM3 // SM3 instructions
+ SM4 // SM4 instructions
+ SVE // Scalable Vector Extension
+ // Keep it last. It automatically defines the size of []flagSet
+ lastID
+
+ firstID FeatureID = UNKNOWN + 1
+)
+
+// CPUInfo contains information about the detected system CPU.
+type CPUInfo struct {
+ BrandName string // Brand name reported by the CPU
+ VendorID Vendor // Comparable CPU vendor ID
+ VendorString string // Raw vendor string.
+ featureSet flagSet // Features of the CPU
+ PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+ ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
+ LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+ Family int // CPU family number
+ Model int // CPU model number
+ Stepping int // CPU stepping info
+ CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
+ Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
+ BoostFreq int64 // Max clock speed, if known, 0 otherwise
+ Cache struct {
+ L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
+ L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
+ L2 int // L2 Cache (per core or shared). Will be -1 if undetected
+ L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
+ }
+ SGX SGXSupport
+ maxFunc uint32
+ maxExFunc uint32
+}
+
+var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
+var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+var xgetbv func(index uint32) (eax, edx uint32)
+var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+var darwinHasAVX512 = func() bool { return false }
+
+// CPU contains information about the CPU as detected on startup,
+// or when Detect last was called.
+//
+// Use this as the primary entry point to you data.
+var CPU CPUInfo
+
+func init() {
+ initCPU()
+ Detect()
+}
+
+// Detect will re-detect current CPU info.
+// This will replace the content of the exported CPU variable.
+//
+// Unless you expect the CPU to change while you are running your program
+// you should not need to call this function.
+// If you call this, you must ensure that no other goroutine is accessing the
+// exported CPU variable.
+func Detect() {
+ // Set defaults
+ CPU.ThreadsPerCore = 1
+ CPU.Cache.L1I = -1
+ CPU.Cache.L1D = -1
+ CPU.Cache.L2 = -1
+ CPU.Cache.L3 = -1
+ safe := true
+ if detectArmFlag != nil {
+ safe = !*detectArmFlag
+ }
+ addInfo(&CPU, safe)
+ if displayFeats != nil && *displayFeats {
+ fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
+ // Exit with non-zero so tests will print value.
+ os.Exit(1)
+ }
+ if disableFlag != nil {
+ s := strings.Split(*disableFlag, ",")
+ for _, feat := range s {
+ feat := ParseFeature(strings.TrimSpace(feat))
+ if feat != UNKNOWN {
+ CPU.featureSet.unset(feat)
+ }
+ }
+ }
+}
+
+// DetectARM will detect ARM64 features.
+// This is NOT done automatically since it can potentially crash
+// if the OS does not handle the command.
+// If in the future this can be done safely this function may not
+// do anything.
+func DetectARM() {
+ addInfo(&CPU, false)
+}
+
+var detectArmFlag *bool
+var displayFeats *bool
+var disableFlag *string
+
+// Flags will enable flags.
+// This must be called *before* flag.Parse AND
+// Detect must be called after the flags have been parsed.
+// Note that this means that any detection used in init() functions
+// will not contain these flags.
+func Flags() {
+ disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
+ displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
+ detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
+}
+
+// Supports returns whether the CPU supports all of the requested features.
+func (c CPUInfo) Supports(ids ...FeatureID) bool {
+ for _, id := range ids {
+ if !c.featureSet.inSet(id) {
+ return false
+ }
+ }
+ return true
+}
+
+// Has allows for checking a single feature.
+// Should be inlined by the compiler.
+func (c CPUInfo) Has(id FeatureID) bool {
+ return c.featureSet.inSet(id)
+}
+
+// AnyOf returns whether the CPU supports one or more of the requested features.
+func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
+ for _, id := range ids {
+ if c.featureSet.inSet(id) {
+ return true
+ }
+ }
+ return false
+}
+
+// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
+var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+
+// X64Level returns the microarchitecture level detected on the CPU.
+// If features are lacking or non x64 mode, 0 is returned.
+// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+func (c CPUInfo) X64Level() int {
+ if c.featureSet.hasSet(level4Features) {
+ return 4
+ }
+ if c.featureSet.hasSet(level3Features) {
+ return 3
+ }
+ if c.featureSet.hasSet(level2Features) {
+ return 2
+ }
+ if c.featureSet.hasSet(level1Features) {
+ return 1
+ }
+ return 0
+}
+
+// Disable will disable one or several features.
+func (c *CPUInfo) Disable(ids ...FeatureID) bool {
+ for _, id := range ids {
+ c.featureSet.unset(id)
+ }
+ return true
+}
+
+// Enable will disable one or several features even if they were undetected.
+// This is of course not recommended for obvious reasons.
+func (c *CPUInfo) Enable(ids ...FeatureID) bool {
+ for _, id := range ids {
+ c.featureSet.set(id)
+ }
+ return true
+}
+
+// IsVendor returns true if vendor is recognized as Intel
+func (c CPUInfo) IsVendor(v Vendor) bool {
+ return c.VendorID == v
+}
+
+// FeatureSet returns all available features as strings.
+func (c CPUInfo) FeatureSet() []string {
+ s := make([]string, 0, c.featureSet.nEnabled())
+ s = append(s, c.featureSet.Strings()...)
+ return s
+}
+
+// RTCounter returns the 64-bit time-stamp counter
+// Uses the RDTSCP instruction. The value 0 is returned
+// if the CPU does not support the instruction.
+func (c CPUInfo) RTCounter() uint64 {
+ if !c.Supports(RDTSCP) {
+ return 0
+ }
+ a, _, _, d := rdtscpAsm()
+ return uint64(a) | (uint64(d) << 32)
+}
+
+// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
+// This variable is OS dependent, but on Linux contains information
+// about the current cpu/core the code is running on.
+// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
+func (c CPUInfo) Ia32TscAux() uint32 {
+ if !c.Supports(RDTSCP) {
+ return 0
+ }
+ _, _, ecx, _ := rdtscpAsm()
+ return ecx
+}
+
+// LogicalCPU will return the Logical CPU the code is currently executing on.
+// This is likely to change when the OS re-schedules the running thread
+// to another CPU.
+// If the current core cannot be detected, -1 will be returned.
+func (c CPUInfo) LogicalCPU() int {
+ if c.maxFunc < 1 {
+ return -1
+ }
+ _, ebx, _, _ := cpuid(1)
+ return int(ebx >> 24)
+}
+
+// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
+// supported, use it, otherwise parse the brand string. Yes, really.
+func (c *CPUInfo) frequencies() {
+ c.Hz, c.BoostFreq = 0, 0
+ mfi := maxFunctionID()
+ if mfi >= 0x15 {
+ eax, ebx, ecx, _ := cpuid(0x15)
+ if eax != 0 && ebx != 0 && ecx != 0 {
+ c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
+ }
+ }
+ if mfi >= 0x16 {
+ a, b, _, _ := cpuid(0x16)
+ // Base...
+ if a&0xffff > 0 {
+ c.Hz = int64(a&0xffff) * 1_000_000
+ }
+ // Boost...
+ if b&0xffff > 0 {
+ c.BoostFreq = int64(b&0xffff) * 1_000_000
+ }
+ }
+ if c.Hz > 0 {
+ return
+ }
+
+ // computeHz determines the official rated speed of a CPU from its brand
+ // string. This insanity is *actually the official documented way to do
+ // this according to Intel*, prior to leaf 0x15 existing. The official
+ // documentation only shows this working for exactly `x.xx` or `xxxx`
+ // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
+ // sizes.
+ model := c.BrandName
+ hz := strings.LastIndex(model, "Hz")
+ if hz < 3 {
+ return
+ }
+ var multiplier int64
+ switch model[hz-1] {
+ case 'M':
+ multiplier = 1000 * 1000
+ case 'G':
+ multiplier = 1000 * 1000 * 1000
+ case 'T':
+ multiplier = 1000 * 1000 * 1000 * 1000
+ }
+ if multiplier == 0 {
+ return
+ }
+ freq := int64(0)
+ divisor := int64(0)
+ decimalShift := int64(1)
+ var i int
+ for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
+ if model[i] >= '0' && model[i] <= '9' {
+ freq += int64(model[i]-'0') * decimalShift
+ decimalShift *= 10
+ } else if model[i] == '.' {
+ if divisor != 0 {
+ return
+ }
+ divisor = decimalShift
+ } else {
+ return
+ }
+ }
+ // we didn't find a space
+ if i < 0 {
+ return
+ }
+ if divisor != 0 {
+ c.Hz = (freq * multiplier) / divisor
+ return
+ }
+ c.Hz = freq * multiplier
+}
+
+// VM Will return true if the cpu id indicates we are in
+// a virtual machine.
+func (c CPUInfo) VM() bool {
+ return CPU.featureSet.inSet(HYPERVISOR)
+}
+
+// flags contains detected cpu features and characteristics
+type flags uint64
+
+// log2(bits_in_uint64)
+const flagBitsLog2 = 6
+const flagBits = 1 << flagBitsLog2
+const flagMask = flagBits - 1
+
+// flagSet contains detected cpu features and characteristics in an array of flags
+type flagSet [(lastID + flagMask) / flagBits]flags
+
+func (s flagSet) inSet(feat FeatureID) bool {
+ return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
+}
+
+func (s *flagSet) set(feat FeatureID) {
+ s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
+}
+
+// setIf will set a feature if boolean is true.
+func (s *flagSet) setIf(cond bool, features ...FeatureID) {
+ if cond {
+ for _, offset := range features {
+ s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
+ }
+ }
+}
+
+func (s *flagSet) unset(offset FeatureID) {
+ bit := flags(1 << (offset & flagMask))
+ s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
+}
+
+// or with another flagset.
+func (s *flagSet) or(other flagSet) {
+ for i, v := range other[:] {
+ s[i] |= v
+ }
+}
+
+// hasSet returns whether all features are present.
+func (s flagSet) hasSet(other flagSet) bool {
+ for i, v := range other[:] {
+ if s[i]&v != v {
+ return false
+ }
+ }
+ return true
+}
+
+// nEnabled will return the number of enabled flags.
+func (s flagSet) nEnabled() (n int) {
+ for _, v := range s[:] {
+ n += bits.OnesCount64(uint64(v))
+ }
+ return n
+}
+
+func flagSetWith(feat ...FeatureID) flagSet {
+ var res flagSet
+ for _, f := range feat {
+ res.set(f)
+ }
+ return res
+}
+
+// ParseFeature will parse the string and return the ID of the matching feature.
+// Will return UNKNOWN if not found.
+func ParseFeature(s string) FeatureID {
+ s = strings.ToUpper(s)
+ for i := firstID; i < lastID; i++ {
+ if i.String() == s {
+ return i
+ }
+ }
+ return UNKNOWN
+}
+
+// Strings returns an array of the detected features for FlagsSet.
+func (s flagSet) Strings() []string {
+ if len(s) == 0 {
+ return []string{""}
+ }
+ r := make([]string, 0)
+ for i := firstID; i < lastID; i++ {
+ if s.inSet(i) {
+ r = append(r, i.String())
+ }
+ }
+ return r
+}
+
+func maxExtendedFunction() uint32 {
+ eax, _, _, _ := cpuid(0x80000000)
+ return eax
+}
+
+func maxFunctionID() uint32 {
+ a, _, _, _ := cpuid(0)
+ return a
+}
+
+func brandName() string {
+ if maxExtendedFunction() >= 0x80000004 {
+ v := make([]uint32, 0, 48)
+ for i := uint32(0); i < 3; i++ {
+ a, b, c, d := cpuid(0x80000002 + i)
+ v = append(v, a, b, c, d)
+ }
+ return strings.Trim(string(valAsString(v...)), " ")
+ }
+ return "unknown"
+}
+
+func threadsPerCore() int {
+ mfi := maxFunctionID()
+ vend, _ := vendorID()
+
+ if mfi < 0x4 || (vend != Intel && vend != AMD) {
+ return 1
+ }
+
+ if mfi < 0xb {
+ if vend != Intel {
+ return 1
+ }
+ _, b, _, d := cpuid(1)
+ if (d & (1 << 28)) != 0 {
+ // v will contain logical core count
+ v := (b >> 16) & 255
+ if v > 1 {
+ a4, _, _, _ := cpuid(4)
+ // physical cores
+ v2 := (a4 >> 26) + 1
+ if v2 > 0 {
+ return int(v) / int(v2)
+ }
+ }
+ }
+ return 1
+ }
+ _, b, _, _ := cpuidex(0xb, 0)
+ if b&0xffff == 0 {
+ if vend == AMD {
+ // Workaround for AMD returning 0, assume 2 if >= Zen 2
+ // It will be more correct than not.
+ fam, _, _ := familyModel()
+ _, _, _, d := cpuid(1)
+ if (d&(1<<28)) != 0 && fam >= 23 {
+ return 2
+ }
+ }
+ return 1
+ }
+ return int(b & 0xffff)
+}
+
+func logicalCores() int {
+ mfi := maxFunctionID()
+ v, _ := vendorID()
+ switch v {
+ case Intel:
+ // Use this on old Intel processors
+ if mfi < 0xb {
+ if mfi < 1 {
+ return 0
+ }
+ // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
+ // that can be assigned to logical processors in a physical package.
+ // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
+ _, ebx, _, _ := cpuid(1)
+ logical := (ebx >> 16) & 0xff
+ return int(logical)
+ }
+ _, b, _, _ := cpuidex(0xb, 1)
+ return int(b & 0xffff)
+ case AMD, Hygon:
+ _, b, _, _ := cpuid(1)
+ return int((b >> 16) & 0xff)
+ default:
+ return 0
+ }
+}
+
+func familyModel() (family, model, stepping int) {
+ if maxFunctionID() < 0x1 {
+ return 0, 0, 0
+ }
+ eax, _, _, _ := cpuid(1)
+ // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
+ family = int((eax >> 8) & 0xf)
+ extFam := family == 0x6 // Intel is 0x6, needs extended model.
+ if family == 0xf {
+ // Add ExtFamily
+ family += int((eax >> 20) & 0xff)
+ extFam = true
+ }
+ // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
+ model = int((eax >> 4) & 0xf)
+ if extFam {
+ // Add ExtModel
+ model += int((eax >> 12) & 0xf0)
+ }
+ stepping = int(eax & 0xf)
+ return family, model, stepping
+}
+
+func physicalCores() int {
+ v, _ := vendorID()
+ switch v {
+ case Intel:
+ return logicalCores() / threadsPerCore()
+ case AMD, Hygon:
+ lc := logicalCores()
+ tpc := threadsPerCore()
+ if lc > 0 && tpc > 0 {
+ return lc / tpc
+ }
+
+ // The following is inaccurate on AMD EPYC 7742 64-Core Processor
+ if maxExtendedFunction() >= 0x80000008 {
+ _, _, c, _ := cpuid(0x80000008)
+ if c&0xff > 0 {
+ return int(c&0xff) + 1
+ }
+ }
+ }
+ return 0
+}
+
+// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
+var vendorMapping = map[string]Vendor{
+ "AMDisbetter!": AMD,
+ "AuthenticAMD": AMD,
+ "CentaurHauls": VIA,
+ "GenuineIntel": Intel,
+ "TransmetaCPU": Transmeta,
+ "GenuineTMx86": Transmeta,
+ "Geode by NSC": NSC,
+ "VIA VIA VIA ": VIA,
+ "KVMKVMKVMKVM": KVM,
+ "Microsoft Hv": MSVM,
+ "VMwareVMware": VMware,
+ "XenVMMXenVMM": XenHVM,
+ "bhyve bhyve ": Bhyve,
+ "HygonGenuine": Hygon,
+ "Vortex86 SoC": SiS,
+ "SiS SiS SiS ": SiS,
+ "RiseRiseRise": SiS,
+ "Genuine RDC": RDC,
+}
+
+func vendorID() (Vendor, string) {
+ _, b, c, d := cpuid(0)
+ v := string(valAsString(b, d, c))
+ vend, ok := vendorMapping[v]
+ if !ok {
+ return VendorUnknown, v
+ }
+ return vend, v
+}
+
+func cacheLine() int {
+ if maxFunctionID() < 0x1 {
+ return 0
+ }
+
+ _, ebx, _, _ := cpuid(1)
+ cache := (ebx & 0xff00) >> 5 // cflush size
+ if cache == 0 && maxExtendedFunction() >= 0x80000006 {
+ _, _, ecx, _ := cpuid(0x80000006)
+ cache = ecx & 0xff // cacheline size
+ }
+ // TODO: Read from Cache and TLB Information
+ return int(cache)
+}
+
+func (c *CPUInfo) cacheSize() {
+ c.Cache.L1D = -1
+ c.Cache.L1I = -1
+ c.Cache.L2 = -1
+ c.Cache.L3 = -1
+ vendor, _ := vendorID()
+ switch vendor {
+ case Intel:
+ if maxFunctionID() < 4 {
+ return
+ }
+ c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
+ for i := uint32(0); ; i++ {
+ eax, ebx, ecx, _ := cpuidex(4, i)
+ cacheType := eax & 15
+ if cacheType == 0 {
+ break
+ }
+ cacheLevel := (eax >> 5) & 7
+ coherency := int(ebx&0xfff) + 1
+ partitions := int((ebx>>12)&0x3ff) + 1
+ associativity := int((ebx>>22)&0x3ff) + 1
+ sets := int(ecx) + 1
+ size := associativity * partitions * coherency * sets
+ switch cacheLevel {
+ case 1:
+ if cacheType == 1 {
+ // 1 = Data Cache
+ c.Cache.L1D = size
+ } else if cacheType == 2 {
+ // 2 = Instruction Cache
+ c.Cache.L1I = size
+ } else {
+ if c.Cache.L1D < 0 {
+ c.Cache.L1I = size
+ }
+ if c.Cache.L1I < 0 {
+ c.Cache.L1I = size
+ }
+ }
+ case 2:
+ c.Cache.L2 = size
+ case 3:
+ c.Cache.L3 = size
+ }
+ }
+ case AMD, Hygon:
+ // Untested.
+ if maxExtendedFunction() < 0x80000005 {
+ return
+ }
+ _, _, ecx, edx := cpuid(0x80000005)
+ c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
+ c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
+
+ if maxExtendedFunction() < 0x80000006 {
+ return
+ }
+ _, _, ecx, _ = cpuid(0x80000006)
+ c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
+
+ // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
+ if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
+ return
+ }
+
+ // Xen Hypervisor is buggy and returns the same entry no matter ECX value.
+ // Hack: When we encounter the same entry 100 times we break.
+ nSame := 0
+ var last uint32
+ for i := uint32(0); i < math.MaxUint32; i++ {
+ eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
+
+ level := (eax >> 5) & 7
+ cacheNumSets := ecx + 1
+ cacheLineSize := 1 + (ebx & 2047)
+ cachePhysPartitions := 1 + ((ebx >> 12) & 511)
+ cacheNumWays := 1 + ((ebx >> 22) & 511)
+
+ typ := eax & 15
+ size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
+ if typ == 0 {
+ return
+ }
+
+ // Check for the same value repeated.
+ comb := eax ^ ebx ^ ecx
+ if comb == last {
+ nSame++
+ if nSame == 100 {
+ return
+ }
+ }
+ last = comb
+
+ switch level {
+ case 1:
+ switch typ {
+ case 1:
+ // Data cache
+ c.Cache.L1D = size
+ case 2:
+ // Inst cache
+ c.Cache.L1I = size
+ default:
+ if c.Cache.L1D < 0 {
+ c.Cache.L1I = size
+ }
+ if c.Cache.L1I < 0 {
+ c.Cache.L1I = size
+ }
+ }
+ case 2:
+ c.Cache.L2 = size
+ case 3:
+ c.Cache.L3 = size
+ }
+ }
+ }
+}
+
+type SGXEPCSection struct {
+ BaseAddress uint64
+ EPCSize uint64
+}
+
+type SGXSupport struct {
+ Available bool
+ LaunchControl bool
+ SGX1Supported bool
+ SGX2Supported bool
+ MaxEnclaveSizeNot64 int64
+ MaxEnclaveSize64 int64
+ EPCSections []SGXEPCSection
+}
+
+func hasSGX(available, lc bool) (rval SGXSupport) {
+ rval.Available = available
+
+ if !available {
+ return
+ }
+
+ rval.LaunchControl = lc
+
+ a, _, _, d := cpuidex(0x12, 0)
+ rval.SGX1Supported = a&0x01 != 0
+ rval.SGX2Supported = a&0x02 != 0
+ rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
+ rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
+ rval.EPCSections = make([]SGXEPCSection, 0)
+
+ for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
+ eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
+ leafType := eax & 0xf
+
+ if leafType == 0 {
+ // Invalid subleaf, stop iterating
+ break
+ } else if leafType == 1 {
+ // EPC Section subleaf
+ baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
+ size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
+
+ section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
+ rval.EPCSections = append(rval.EPCSections, section)
+ }
+ }
+
+ return
+}
+
+func support() flagSet {
+ var fs flagSet
+ mfi := maxFunctionID()
+ vend, _ := vendorID()
+ if mfi < 0x1 {
+ return fs
+ }
+ family, model, _ := familyModel()
+
+ _, _, c, d := cpuid(1)
+ fs.setIf((d&(1<<0)) != 0, X87)
+ fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
+ fs.setIf((d&(1<<11)) != 0, SYSEE)
+ fs.setIf((d&(1<<15)) != 0, CMOV)
+ fs.setIf((d&(1<<23)) != 0, MMX)
+ fs.setIf((d&(1<<24)) != 0, FXSR)
+ fs.setIf((d&(1<<25)) != 0, FXSROPT)
+ fs.setIf((d&(1<<25)) != 0, SSE)
+ fs.setIf((d&(1<<26)) != 0, SSE2)
+ fs.setIf((c&1) != 0, SSE3)
+ fs.setIf((c&(1<<5)) != 0, VMX)
+ fs.setIf((c&(1<<9)) != 0, SSSE3)
+ fs.setIf((c&(1<<19)) != 0, SSE4)
+ fs.setIf((c&(1<<20)) != 0, SSE42)
+ fs.setIf((c&(1<<25)) != 0, AESNI)
+ fs.setIf((c&(1<<1)) != 0, CLMUL)
+ fs.setIf(c&(1<<22) != 0, MOVBE)
+ fs.setIf(c&(1<<23) != 0, POPCNT)
+ fs.setIf(c&(1<<30) != 0, RDRAND)
+
+ // This bit has been reserved by Intel & AMD for use by hypervisors,
+ // and indicates the presence of a hypervisor.
+ fs.setIf(c&(1<<31) != 0, HYPERVISOR)
+ fs.setIf(c&(1<<29) != 0, F16C)
+ fs.setIf(c&(1<<13) != 0, CX16)
+
+ if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
+ fs.setIf(threadsPerCore() > 1, HTT)
+ }
+ if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
+ fs.setIf(threadsPerCore() > 1, HTT)
+ }
+ fs.setIf(c&1<<26 != 0, XSAVE)
+ fs.setIf(c&1<<27 != 0, OSXSAVE)
+ // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
+ const avxCheck = 1<<26 | 1<<27 | 1<<28
+ if c&avxCheck == avxCheck {
+ // Check for OS support
+ eax, _ := xgetbv(0)
+ if (eax & 0x6) == 0x6 {
+ fs.set(AVX)
+ switch vend {
+ case Intel:
+ // Older than Haswell.
+ fs.setIf(family == 6 && model < 60, AVXSLOW)
+ case AMD:
+ // Older than Zen 2
+ fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
+ }
+ }
+ }
+ // FMA3 can be used with SSE registers, so no OS support is strictly needed.
+ // fma3 and OSXSAVE needed.
+ const fma3Check = 1<<12 | 1<<27
+ fs.setIf(c&fma3Check == fma3Check, FMA3)
+
+ // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+ if mfi >= 7 {
+ _, ebx, ecx, edx := cpuidex(7, 0)
+ if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
+ fs.set(AVX2)
+ }
+ // CPUID.(EAX=7, ECX=0).EBX
+ if (ebx & 0x00000008) != 0 {
+ fs.set(BMI1)
+ fs.setIf((ebx&0x00000100) != 0, BMI2)
+ }
+ fs.setIf(ebx&(1<<2) != 0, SGX)
+ fs.setIf(ebx&(1<<4) != 0, HLE)
+ fs.setIf(ebx&(1<<9) != 0, ERMS)
+ fs.setIf(ebx&(1<<11) != 0, RTM)
+ fs.setIf(ebx&(1<<14) != 0, MPX)
+ fs.setIf(ebx&(1<<18) != 0, RDSEED)
+ fs.setIf(ebx&(1<<19) != 0, ADX)
+ fs.setIf(ebx&(1<<29) != 0, SHA)
+
+ // CPUID.(EAX=7, ECX=0).ECX
+ fs.setIf(ecx&(1<<5) != 0, WAITPKG)
+ fs.setIf(ecx&(1<<7) != 0, CETSS)
+ fs.setIf(ecx&(1<<8) != 0, GFNI)
+ fs.setIf(ecx&(1<<9) != 0, VAES)
+ fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
+ fs.setIf(ecx&(1<<13) != 0, TME)
+ fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
+ fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
+ fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
+ fs.setIf(ecx&(1<<29) != 0, ENQCMD)
+ fs.setIf(ecx&(1<<30) != 0, SGXLC)
+
+ // CPUID.(EAX=7, ECX=0).EDX
+ fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
+ fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+ fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
+ fs.setIf(edx&(1<<18) != 0, PCONFIG)
+ fs.setIf(edx&(1<<20) != 0, CETIBT)
+ fs.setIf(edx&(1<<26) != 0, IBPB)
+ fs.setIf(edx&(1<<27) != 0, STIBP)
+
+ // CPUID.(EAX=7, ECX=1)
+ eax1, _, _, _ := cpuidex(7, 1)
+ fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+ fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
+ fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
+ fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
+ fs.setIf(eax1&(1<<22) != 0, HRESET)
+ fs.setIf(eax1&(1<<26) != 0, LAM)
+
+ // Only detect AVX-512 features if XGETBV is supported
+ if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+ // Check for OS support
+ eax, _ := xgetbv(0)
+
+ // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+ // ZMM16-ZMM31 state are enabled by OS)
+ /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+ hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
+ if runtime.GOOS == "darwin" {
+ hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
+ }
+ if hasAVX512 {
+ fs.setIf(ebx&(1<<16) != 0, AVX512F)
+ fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
+ fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
+ fs.setIf(ebx&(1<<26) != 0, AVX512PF)
+ fs.setIf(ebx&(1<<27) != 0, AVX512ER)
+ fs.setIf(ebx&(1<<28) != 0, AVX512CD)
+ fs.setIf(ebx&(1<<30) != 0, AVX512BW)
+ fs.setIf(ebx&(1<<31) != 0, AVX512VL)
+ // ecx
+ fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
+ fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
+ fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
+ fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
+ fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
+ // edx
+ fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
+ fs.setIf(edx&(1<<22) != 0, AMXBF16)
+ fs.setIf(edx&(1<<23) != 0, AVX512FP16)
+ fs.setIf(edx&(1<<24) != 0, AMXTILE)
+ fs.setIf(edx&(1<<25) != 0, AMXINT8)
+ // eax1 = CPUID.(EAX=7, ECX=1).EAX
+ fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+ }
+ }
+ }
+ // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
+ // EAX
+ // Bit 00: XSAVEOPT is available.
+ // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
+ // Bit 02: Supports XGETBV with ECX = 1 if set.
+ // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
+ // Bits 31 - 04: Reserved.
+ // EBX
+ // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
+ // ECX
+ // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
+ // EDX?
+ // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
+ if mfi >= 0xd {
+ if fs.inSet(XSAVE) {
+ eax, _, _, _ := cpuidex(0xd, 1)
+ fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
+ fs.setIf(eax&(1<<1) != 0, XSAVEC)
+ fs.setIf(eax&(1<<2) != 0, XGETBV1)
+ fs.setIf(eax&(1<<3) != 0, XSAVES)
+ }
+ }
+ if maxExtendedFunction() >= 0x80000001 {
+ _, _, c, d := cpuid(0x80000001)
+ if (c & (1 << 5)) != 0 {
+ fs.set(LZCNT)
+ fs.set(POPCNT)
+ }
+ // ECX
+ fs.setIf((c&(1<<0)) != 0, LAHF)
+ fs.setIf((c&(1<<2)) != 0, SVM)
+ fs.setIf((c&(1<<6)) != 0, SSE4A)
+ fs.setIf((c&(1<<10)) != 0, IBS)
+ fs.setIf((c&(1<<22)) != 0, TOPEXT)
+
+ // EDX
+ fs.setIf(d&(1<<11) != 0, SYSCALL)
+ fs.setIf(d&(1<<20) != 0, NX)
+ fs.setIf(d&(1<<22) != 0, MMXEXT)
+ fs.setIf(d&(1<<23) != 0, MMX)
+ fs.setIf(d&(1<<24) != 0, FXSR)
+ fs.setIf(d&(1<<25) != 0, FXSROPT)
+ fs.setIf(d&(1<<27) != 0, RDTSCP)
+ fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
+ fs.setIf(d&(1<<31) != 0, AMD3DNOW)
+
+ /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+ * used unless the OS has AVX support. */
+ if fs.inSet(AVX) {
+ fs.setIf((c&(1<<11)) != 0, XOP)
+ fs.setIf((c&(1<<16)) != 0, FMA4)
+ }
+
+ }
+ if maxExtendedFunction() >= 0x80000007 {
+ _, b, _, d := cpuid(0x80000007)
+ fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
+ fs.setIf((b&(1<<1)) != 0, SUCCOR)
+ fs.setIf((b&(1<<2)) != 0, HWA)
+ fs.setIf((d&(1<<9)) != 0, CPBOOST)
+ }
+
+ if maxExtendedFunction() >= 0x80000008 {
+ _, b, _, _ := cpuid(0x80000008)
+ fs.setIf((b&(1<<9)) != 0, WBNOINVD)
+ fs.setIf((b&(1<<8)) != 0, MCOMMIT)
+ fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+ fs.setIf((b&(1<<4)) != 0, RDPRU)
+ fs.setIf((b&(1<<3)) != 0, INVLPGB)
+ fs.setIf((b&(1<<1)) != 0, MSRIRC)
+ fs.setIf((b&(1<<0)) != 0, CLZERO)
+ }
+
+ if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
+ _, _, _, edx := cpuid(0x8000000A)
+ fs.setIf((edx>>0)&1 == 1, SVMNP)
+ fs.setIf((edx>>1)&1 == 1, LBRVIRT)
+ fs.setIf((edx>>2)&1 == 1, SVML)
+ fs.setIf((edx>>3)&1 == 1, NRIPS)
+ fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
+ fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
+ fs.setIf((edx>>6)&1 == 1, SVMFBASID)
+ fs.setIf((edx>>7)&1 == 1, SVMDA)
+ fs.setIf((edx>>10)&1 == 1, SVMPF)
+ fs.setIf((edx>>12)&1 == 1, SVMPFT)
+ }
+
+ if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
+ eax, _, _, _ := cpuid(0x8000001b)
+ fs.setIf((eax>>0)&1 == 1, IBSFFV)
+ fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
+ fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
+ fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
+ fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
+ fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
+ fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
+ fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+ }
+
+ if maxExtendedFunction() >= 0x8000001f && vend == AMD {
+ a, _, _, _ := cpuid(0x8000001f)
+ fs.setIf((a>>0)&1 == 1, SME)
+ fs.setIf((a>>1)&1 == 1, SEV)
+ fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
+ fs.setIf((a>>3)&1 == 1, SEV_ES)
+ fs.setIf((a>>4)&1 == 1, SEV_SNP)
+ fs.setIf((a>>5)&1 == 1, VMPL)
+ fs.setIf((a>>10)&1 == 1, SME_COHERENT)
+ fs.setIf((a>>11)&1 == 1, SEV_64BIT)
+ fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
+ fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
+ fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
+ fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
+ fs.setIf((a>>16)&1 == 1, VTE)
+ fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
+ }
+
+ return fs
+}
+
+func valAsString(values ...uint32) []byte {
+ r := make([]byte, 4*len(values))
+ for i, v := range values {
+ dst := r[i*4:]
+ dst[0] = byte(v & 0xff)
+ dst[1] = byte((v >> 8) & 0xff)
+ dst[2] = byte((v >> 16) & 0xff)
+ dst[3] = byte((v >> 24) & 0xff)
+ switch {
+ case dst[0] == 0:
+ return r[:i*4]
+ case dst[1] == 0:
+ return r[:i*4+1]
+ case dst[2] == 0:
+ return r[:i*4+2]
+ case dst[3] == 0:
+ return r[:i*4+3]
+ }
+ }
+ return r
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
new file mode 100644
index 000000000..8587c3a1f
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
@@ -0,0 +1,47 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build 386,!gccgo,!noasm,!appengine
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+ XORL CX, CX
+ MOVL op+0(FP), AX
+ CPUID
+ MOVL AX, eax+4(FP)
+ MOVL BX, ebx+8(FP)
+ MOVL CX, ecx+12(FP)
+ MOVL DX, edx+16(FP)
+ RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+ MOVL op+0(FP), AX
+ MOVL op2+4(FP), CX
+ CPUID
+ MOVL AX, eax+8(FP)
+ MOVL BX, ebx+12(FP)
+ MOVL CX, ecx+16(FP)
+ MOVL DX, edx+20(FP)
+ RET
+
+// func xgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+ MOVL index+0(FP), CX
+ BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+ MOVL AX, eax+4(FP)
+ MOVL DX, edx+8(FP)
+ RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+ BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+ MOVL AX, eax+0(FP)
+ MOVL BX, ebx+4(FP)
+ MOVL CX, ecx+8(FP)
+ MOVL DX, edx+12(FP)
+ RET
+
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0
+ MOVL $0, eax+0(FP)
+ RET
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
new file mode 100644
index 000000000..bc11f8942
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
@@ -0,0 +1,72 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build amd64,!gccgo,!noasm,!appengine
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+ XORQ CX, CX
+ MOVL op+0(FP), AX
+ CPUID
+ MOVL AX, eax+8(FP)
+ MOVL BX, ebx+12(FP)
+ MOVL CX, ecx+16(FP)
+ MOVL DX, edx+20(FP)
+ RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+ MOVL op+0(FP), AX
+ MOVL op2+4(FP), CX
+ CPUID
+ MOVL AX, eax+8(FP)
+ MOVL BX, ebx+12(FP)
+ MOVL CX, ecx+16(FP)
+ MOVL DX, edx+20(FP)
+ RET
+
+// func asmXgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+ MOVL index+0(FP), CX
+ BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+ MOVL AX, eax+8(FP)
+ MOVL DX, edx+12(FP)
+ RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+ BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+ MOVL AX, eax+0(FP)
+ MOVL BX, ebx+4(FP)
+ MOVL CX, ecx+8(FP)
+ MOVL DX, edx+12(FP)
+ RET
+
+// From https://go-review.googlesource.com/c/sys/+/285572/
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
+ MOVB $0, ret+0(FP) // default to false
+
+#ifdef GOOS_darwin // return if not darwin
+#ifdef GOARCH_amd64 // return if not amd64
+// These values from:
+// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
+#define commpage64_base_address 0x00007fffffe00000
+#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
+#define commpage64_version (commpage64_base_address+0x01E)
+#define hasAVX512F 0x0000004000000000
+ MOVQ $commpage64_version, BX
+ MOVW (BX), AX
+ CMPW AX, $13 // versions < 13 do not support AVX512
+ JL no_avx512
+ MOVQ $commpage64_cpu_capabilities64, BX
+ MOVQ (BX), AX
+ MOVQ $hasAVX512F, CX
+ ANDQ CX, AX
+ JZ no_avx512
+ MOVB $1, ret+0(FP)
+
+no_avx512:
+#endif
+#endif
+ RET
+
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
new file mode 100644
index 000000000..b31d6aec4
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
@@ -0,0 +1,26 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build arm64,!gccgo,!noasm,!appengine
+
+// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
+
+// func getMidr
+TEXT ·getMidr(SB), 7, $0
+ WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */
+ MOVD R0, midr+0(FP)
+ RET
+
+// func getProcFeatures
+TEXT ·getProcFeatures(SB), 7, $0
+ WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */
+ MOVD R0, procFeatures+0(FP)
+ RET
+
+// func getInstAttributes
+TEXT ·getInstAttributes(SB), 7, $0
+ WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
+ WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
+ MOVD R0, instAttrReg0+0(FP)
+ MOVD R1, instAttrReg1+8(FP)
+ RET
+
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
new file mode 100644
index 000000000..9a53504a0
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
@@ -0,0 +1,247 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build arm64 && !gccgo && !noasm && !appengine
+// +build arm64,!gccgo,!noasm,!appengine
+
+package cpuid
+
+import "runtime"
+
+func getMidr() (midr uint64)
+func getProcFeatures() (procFeatures uint64)
+func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
+
+func initCPU() {
+ cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
+ rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
+}
+
+func addInfo(c *CPUInfo, safe bool) {
+ // Seems to be safe to assume on ARM64
+ c.CacheLine = 64
+ detectOS(c)
+
+ // ARM64 disabled since it may crash if interrupt is not intercepted by OS.
+ if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
+ return
+ }
+ midr := getMidr()
+
+ // MIDR_EL1 - Main ID Register
+ // https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | Implementer | [31-24] | y |
+ // |--------------------------------------------------|
+ // | Variant | [23-20] | y |
+ // |--------------------------------------------------|
+ // | Architecture | [19-16] | y |
+ // |--------------------------------------------------|
+ // | PartNum | [15-4] | y |
+ // |--------------------------------------------------|
+ // | Revision | [3-0] | y |
+ // x--------------------------------------------------x
+
+ switch (midr >> 24) & 0xff {
+ case 0xC0:
+ c.VendorString = "Ampere Computing"
+ c.VendorID = Ampere
+ case 0x41:
+ c.VendorString = "Arm Limited"
+ c.VendorID = ARM
+ case 0x42:
+ c.VendorString = "Broadcom Corporation"
+ c.VendorID = Broadcom
+ case 0x43:
+ c.VendorString = "Cavium Inc"
+ c.VendorID = Cavium
+ case 0x44:
+ c.VendorString = "Digital Equipment Corporation"
+ c.VendorID = DEC
+ case 0x46:
+ c.VendorString = "Fujitsu Ltd"
+ c.VendorID = Fujitsu
+ case 0x49:
+ c.VendorString = "Infineon Technologies AG"
+ c.VendorID = Infineon
+ case 0x4D:
+ c.VendorString = "Motorola or Freescale Semiconductor Inc"
+ c.VendorID = Motorola
+ case 0x4E:
+ c.VendorString = "NVIDIA Corporation"
+ c.VendorID = NVIDIA
+ case 0x50:
+ c.VendorString = "Applied Micro Circuits Corporation"
+ c.VendorID = AMCC
+ case 0x51:
+ c.VendorString = "Qualcomm Inc"
+ c.VendorID = Qualcomm
+ case 0x56:
+ c.VendorString = "Marvell International Ltd"
+ c.VendorID = Marvell
+ case 0x69:
+ c.VendorString = "Intel Corporation"
+ c.VendorID = Intel
+ }
+
+ // Lower 4 bits: Architecture
+ // Architecture Meaning
+ // 0b0001 Armv4.
+ // 0b0010 Armv4T.
+ // 0b0011 Armv5 (obsolete).
+ // 0b0100 Armv5T.
+ // 0b0101 Armv5TE.
+ // 0b0110 Armv5TEJ.
+ // 0b0111 Armv6.
+ // 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'.
+ // Upper 4 bit: Variant
+ // An IMPLEMENTATION DEFINED variant number.
+ // Typically, this field is used to distinguish between different product variants, or major revisions of a product.
+ c.Family = int(midr>>16) & 0xff
+
+ // PartNum, bits [15:4]
+ // An IMPLEMENTATION DEFINED primary part number for the device.
+ // On processors implemented by Arm, if the top four bits of the primary
+ // part number are 0x0 or 0x7, the variant and architecture are encoded differently.
+ // Revision, bits [3:0]
+ // An IMPLEMENTATION DEFINED revision number for the device.
+ c.Model = int(midr) & 0xffff
+
+ procFeatures := getProcFeatures()
+
+ // ID_AA64PFR0_EL1 - Processor Feature Register 0
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | DIT | [51-48] | y |
+ // |--------------------------------------------------|
+ // | SVE | [35-32] | y |
+ // |--------------------------------------------------|
+ // | GIC | [27-24] | n |
+ // |--------------------------------------------------|
+ // | AdvSIMD | [23-20] | y |
+ // |--------------------------------------------------|
+ // | FP | [19-16] | y |
+ // |--------------------------------------------------|
+ // | EL3 | [15-12] | n |
+ // |--------------------------------------------------|
+ // | EL2 | [11-8] | n |
+ // |--------------------------------------------------|
+ // | EL1 | [7-4] | n |
+ // |--------------------------------------------------|
+ // | EL0 | [3-0] | n |
+ // x--------------------------------------------------x
+
+ var f flagSet
+ // if procFeatures&(0xf<<48) != 0 {
+ // fmt.Println("DIT")
+ // }
+ f.setIf(procFeatures&(0xf<<32) != 0, SVE)
+ if procFeatures&(0xf<<20) != 15<<20 {
+ f.set(ASIMD)
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
+ // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
+ f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
+ }
+ f.setIf(procFeatures&(0xf<<16) != 0, FP)
+
+ instAttrReg0, instAttrReg1 := getInstAttributes()
+
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+ //
+ // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | TS | [55-52] | y |
+ // |--------------------------------------------------|
+ // | FHM | [51-48] | y |
+ // |--------------------------------------------------|
+ // | DP | [47-44] | y |
+ // |--------------------------------------------------|
+ // | SM4 | [43-40] | y |
+ // |--------------------------------------------------|
+ // | SM3 | [39-36] | y |
+ // |--------------------------------------------------|
+ // | SHA3 | [35-32] | y |
+ // |--------------------------------------------------|
+ // | RDM | [31-28] | y |
+ // |--------------------------------------------------|
+ // | ATOMICS | [23-20] | y |
+ // |--------------------------------------------------|
+ // | CRC32 | [19-16] | y |
+ // |--------------------------------------------------|
+ // | SHA2 | [15-12] | y |
+ // |--------------------------------------------------|
+ // | SHA1 | [11-8] | y |
+ // |--------------------------------------------------|
+ // | AES | [7-4] | y |
+ // x--------------------------------------------------x
+
+ // if instAttrReg0&(0xf<<52) != 0 {
+ // fmt.Println("TS")
+ // }
+ // if instAttrReg0&(0xf<<48) != 0 {
+ // fmt.Println("FHM")
+ // }
+ f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
+ f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
+ f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
+ f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
+ f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
+ f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
+ f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
+ f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+ // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
+ f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
+ f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
+ f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+ // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
+ f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
+
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
+ //
+ // ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | GPI | [31-28] | y |
+ // |--------------------------------------------------|
+ // | GPA | [27-24] | y |
+ // |--------------------------------------------------|
+ // | LRCPC | [23-20] | y |
+ // |--------------------------------------------------|
+ // | FCMA | [19-16] | y |
+ // |--------------------------------------------------|
+ // | JSCVT | [15-12] | y |
+ // |--------------------------------------------------|
+ // | API | [11-8] | y |
+ // |--------------------------------------------------|
+ // | APA | [7-4] | y |
+ // |--------------------------------------------------|
+ // | DPB | [3-0] | y |
+ // x--------------------------------------------------x
+
+ // if instAttrReg1&(0xf<<28) != 0 {
+ // fmt.Println("GPI")
+ // }
+ f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
+ f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
+ f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
+ f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
+ // if instAttrReg1&(0xf<<8) != 0 {
+ // fmt.Println("API")
+ // }
+ // if instAttrReg1&(0xf<<4) != 0 {
+ // fmt.Println("APA")
+ // }
+ f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
+
+ // Store
+ c.featureSet.or(f)
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
new file mode 100644
index 000000000..9636c2bc1
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
@@ -0,0 +1,15 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
+// +build !amd64,!386,!arm64 gccgo noasm appengine
+
+package cpuid
+
+func initCPU() {
+ cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
+ rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
+}
+
+func addInfo(info *CPUInfo, safe bool) {}
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
new file mode 100644
index 000000000..c946824ec
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@@ -0,0 +1,36 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
+// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
+
+package cpuid
+
+func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+func asmXgetbv(index uint32) (eax, edx uint32)
+func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+func asmDarwinHasAVX512() bool
+
+func initCPU() {
+ cpuid = asmCpuid
+ cpuidex = asmCpuidex
+ xgetbv = asmXgetbv
+ rdtscpAsm = asmRdtscpAsm
+ darwinHasAVX512 = asmDarwinHasAVX512
+}
+
+func addInfo(c *CPUInfo, safe bool) {
+ c.maxFunc = maxFunctionID()
+ c.maxExFunc = maxExtendedFunction()
+ c.BrandName = brandName()
+ c.CacheLine = cacheLine()
+ c.Family, c.Model, c.Stepping = familyModel()
+ c.featureSet = support()
+ c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
+ c.ThreadsPerCore = threadsPerCore()
+ c.LogicalCores = logicalCores()
+ c.PhysicalCores = physicalCores()
+ c.VendorID, c.VendorString = vendorID()
+ c.cacheSize()
+ c.frequencies()
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
new file mode 100644
index 000000000..d12e547c4
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@@ -0,0 +1,235 @@
+// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
+
+package cpuid
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[ADX-1]
+ _ = x[AESNI-2]
+ _ = x[AMD3DNOW-3]
+ _ = x[AMD3DNOWEXT-4]
+ _ = x[AMXBF16-5]
+ _ = x[AMXINT8-6]
+ _ = x[AMXTILE-7]
+ _ = x[AVX-8]
+ _ = x[AVX2-9]
+ _ = x[AVX512BF16-10]
+ _ = x[AVX512BITALG-11]
+ _ = x[AVX512BW-12]
+ _ = x[AVX512CD-13]
+ _ = x[AVX512DQ-14]
+ _ = x[AVX512ER-15]
+ _ = x[AVX512F-16]
+ _ = x[AVX512FP16-17]
+ _ = x[AVX512IFMA-18]
+ _ = x[AVX512PF-19]
+ _ = x[AVX512VBMI-20]
+ _ = x[AVX512VBMI2-21]
+ _ = x[AVX512VL-22]
+ _ = x[AVX512VNNI-23]
+ _ = x[AVX512VP2INTERSECT-24]
+ _ = x[AVX512VPOPCNTDQ-25]
+ _ = x[AVXSLOW-26]
+ _ = x[AVXVNNI-27]
+ _ = x[BMI1-28]
+ _ = x[BMI2-29]
+ _ = x[CETIBT-30]
+ _ = x[CETSS-31]
+ _ = x[CLDEMOTE-32]
+ _ = x[CLMUL-33]
+ _ = x[CLZERO-34]
+ _ = x[CMOV-35]
+ _ = x[CMPSB_SCADBS_SHORT-36]
+ _ = x[CMPXCHG8-37]
+ _ = x[CPBOOST-38]
+ _ = x[CX16-39]
+ _ = x[ENQCMD-40]
+ _ = x[ERMS-41]
+ _ = x[F16C-42]
+ _ = x[FMA3-43]
+ _ = x[FMA4-44]
+ _ = x[FXSR-45]
+ _ = x[FXSROPT-46]
+ _ = x[GFNI-47]
+ _ = x[HLE-48]
+ _ = x[HRESET-49]
+ _ = x[HTT-50]
+ _ = x[HWA-51]
+ _ = x[HYPERVISOR-52]
+ _ = x[IBPB-53]
+ _ = x[IBS-54]
+ _ = x[IBSBRNTRGT-55]
+ _ = x[IBSFETCHSAM-56]
+ _ = x[IBSFFV-57]
+ _ = x[IBSOPCNT-58]
+ _ = x[IBSOPCNTEXT-59]
+ _ = x[IBSOPSAM-60]
+ _ = x[IBSRDWROPCNT-61]
+ _ = x[IBSRIPINVALIDCHK-62]
+ _ = x[IBS_PREVENTHOST-63]
+ _ = x[INT_WBINVD-64]
+ _ = x[INVLPGB-65]
+ _ = x[LAHF-66]
+ _ = x[LAM-67]
+ _ = x[LBRVIRT-68]
+ _ = x[LZCNT-69]
+ _ = x[MCAOVERFLOW-70]
+ _ = x[MCOMMIT-71]
+ _ = x[MMX-72]
+ _ = x[MMXEXT-73]
+ _ = x[MOVBE-74]
+ _ = x[MOVDIR64B-75]
+ _ = x[MOVDIRI-76]
+ _ = x[MOVSB_ZL-77]
+ _ = x[MPX-78]
+ _ = x[MSRIRC-79]
+ _ = x[MSR_PAGEFLUSH-80]
+ _ = x[NRIPS-81]
+ _ = x[NX-82]
+ _ = x[OSXSAVE-83]
+ _ = x[PCONFIG-84]
+ _ = x[POPCNT-85]
+ _ = x[RDPRU-86]
+ _ = x[RDRAND-87]
+ _ = x[RDSEED-88]
+ _ = x[RDTSCP-89]
+ _ = x[RTM-90]
+ _ = x[RTM_ALWAYS_ABORT-91]
+ _ = x[SERIALIZE-92]
+ _ = x[SEV-93]
+ _ = x[SEV_64BIT-94]
+ _ = x[SEV_ALTERNATIVE-95]
+ _ = x[SEV_DEBUGSWAP-96]
+ _ = x[SEV_ES-97]
+ _ = x[SEV_RESTRICTED-98]
+ _ = x[SEV_SNP-99]
+ _ = x[SGX-100]
+ _ = x[SGXLC-101]
+ _ = x[SHA-102]
+ _ = x[SME-103]
+ _ = x[SME_COHERENT-104]
+ _ = x[SSE-105]
+ _ = x[SSE2-106]
+ _ = x[SSE3-107]
+ _ = x[SSE4-108]
+ _ = x[SSE42-109]
+ _ = x[SSE4A-110]
+ _ = x[SSSE3-111]
+ _ = x[STIBP-112]
+ _ = x[STOSB_SHORT-113]
+ _ = x[SUCCOR-114]
+ _ = x[SVM-115]
+ _ = x[SVMDA-116]
+ _ = x[SVMFBASID-117]
+ _ = x[SVML-118]
+ _ = x[SVMNP-119]
+ _ = x[SVMPF-120]
+ _ = x[SVMPFT-121]
+ _ = x[SYSCALL-122]
+ _ = x[SYSEE-123]
+ _ = x[TBM-124]
+ _ = x[TOPEXT-125]
+ _ = x[TME-126]
+ _ = x[TSCRATEMSR-127]
+ _ = x[TSXLDTRK-128]
+ _ = x[VAES-129]
+ _ = x[VMCBCLEAN-130]
+ _ = x[VMPL-131]
+ _ = x[VMSA_REGPROT-132]
+ _ = x[VMX-133]
+ _ = x[VPCLMULQDQ-134]
+ _ = x[VTE-135]
+ _ = x[WAITPKG-136]
+ _ = x[WBNOINVD-137]
+ _ = x[X87-138]
+ _ = x[XGETBV1-139]
+ _ = x[XOP-140]
+ _ = x[XSAVE-141]
+ _ = x[XSAVEC-142]
+ _ = x[XSAVEOPT-143]
+ _ = x[XSAVES-144]
+ _ = x[AESARM-145]
+ _ = x[ARMCPUID-146]
+ _ = x[ASIMD-147]
+ _ = x[ASIMDDP-148]
+ _ = x[ASIMDHP-149]
+ _ = x[ASIMDRDM-150]
+ _ = x[ATOMICS-151]
+ _ = x[CRC32-152]
+ _ = x[DCPOP-153]
+ _ = x[EVTSTRM-154]
+ _ = x[FCMA-155]
+ _ = x[FP-156]
+ _ = x[FPHP-157]
+ _ = x[GPA-158]
+ _ = x[JSCVT-159]
+ _ = x[LRCPC-160]
+ _ = x[PMULL-161]
+ _ = x[SHA1-162]
+ _ = x[SHA2-163]
+ _ = x[SHA3-164]
+ _ = x[SHA512-165]
+ _ = x[SM3-166]
+ _ = x[SM4-167]
+ _ = x[SVE-168]
+ _ = x[lastID-169]
+ _ = x[firstID-0]
+}
+
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122}
+
+func (i FeatureID) String() string {
+ if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
+ return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
+}
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[VendorUnknown-0]
+ _ = x[Intel-1]
+ _ = x[AMD-2]
+ _ = x[VIA-3]
+ _ = x[Transmeta-4]
+ _ = x[NSC-5]
+ _ = x[KVM-6]
+ _ = x[MSVM-7]
+ _ = x[VMware-8]
+ _ = x[XenHVM-9]
+ _ = x[Bhyve-10]
+ _ = x[Hygon-11]
+ _ = x[SiS-12]
+ _ = x[RDC-13]
+ _ = x[Ampere-14]
+ _ = x[ARM-15]
+ _ = x[Broadcom-16]
+ _ = x[Cavium-17]
+ _ = x[DEC-18]
+ _ = x[Fujitsu-19]
+ _ = x[Infineon-20]
+ _ = x[Motorola-21]
+ _ = x[NVIDIA-22]
+ _ = x[AMCC-23]
+ _ = x[Qualcomm-24]
+ _ = x[Marvell-25]
+ _ = x[lastVendor-26]
+}
+
+const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
+
+var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
+
+func (i Vendor) String() string {
+ if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
+ return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
new file mode 100644
index 000000000..d91d02109
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
@@ -0,0 +1,121 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+package cpuid
+
+import (
+ "runtime"
+ "strings"
+
+ "golang.org/x/sys/unix"
+)
+
+func detectOS(c *CPUInfo) bool {
+ if runtime.GOOS != "ios" {
+ tryToFillCPUInfoFomSysctl(c)
+ }
+ // There are no hw.optional sysctl values for the below features on Mac OS 11.0
+ // to detect their supported state dynamically. Assume the CPU features that
+ // Apple Silicon M1 supports to be available as a minimal set of features
+ // to all Go programs running on darwin/arm64.
+ // TODO: Add more if we know them.
+ c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
+
+ return true
+}
+
+func sysctlGetBool(name string) bool {
+ value, err := unix.SysctlUint32(name)
+ if err != nil {
+ return false
+ }
+ return value != 0
+}
+
+func sysctlGetString(name string) string {
+ value, err := unix.Sysctl(name)
+ if err != nil {
+ return ""
+ }
+ return value
+}
+
+func sysctlGetInt(unknown int, names ...string) int {
+ for _, name := range names {
+ value, err := unix.SysctlUint32(name)
+ if err != nil {
+ continue
+ }
+ if value != 0 {
+ return int(value)
+ }
+ }
+ return unknown
+}
+
+func sysctlGetInt64(unknown int, names ...string) int {
+ for _, name := range names {
+ value64, err := unix.SysctlUint64(name)
+ if err != nil {
+ continue
+ }
+ if int(value64) != unknown {
+ return int(value64)
+ }
+ }
+ return unknown
+}
+
+func setFeature(c *CPUInfo, name string, feature FeatureID) {
+ c.featureSet.setIf(sysctlGetBool(name), feature)
+}
+func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
+ c.BrandName = sysctlGetString("machdep.cpu.brand_string")
+
+ if len(c.BrandName) != 0 {
+ c.VendorString = strings.Fields(c.BrandName)[0]
+ }
+
+ c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
+ c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
+ sysctlGetInt(1, "hw.physicalcpu")
+ c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
+ c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
+ c.Model = sysctlGetInt(0, "machdep.cpu.model")
+ c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
+ c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
+ c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize")
+ c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
+ c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
+
+ // from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
+ setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
+ setFeature(c, "hw.optional.AdvSIMD", ASIMD)
+ setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
+ setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
+ setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
+ setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
+ // setFeature(c, "", EVTSTRM)
+ setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
+ setFeature(c, "hw.optional.arm.FEAT_FP", FP)
+ setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
+ setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
+ setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
+ setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
+ setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
+ setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
+ setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
+ setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
+ setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
+ // setFeature(c, "", SM3)
+ // setFeature(c, "", SM4)
+ setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
+
+ // from empirical observation
+ setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
+ setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
+ setFeature(c, "hw.optional.floatingpoint", FP)
+ setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
+ setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
+ setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
+ setFeature(c, "hw.optional.armv8_crc32", CRC32)
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
new file mode 100644
index 000000000..ee278b9e4
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
@@ -0,0 +1,130 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file located
+// here https://github.com/golang/sys/blob/master/LICENSE
+
+package cpuid
+
+import (
+ "encoding/binary"
+ "io/ioutil"
+ "runtime"
+)
+
+// HWCAP bits.
+const (
+ hwcap_FP = 1 << 0
+ hwcap_ASIMD = 1 << 1
+ hwcap_EVTSTRM = 1 << 2
+ hwcap_AES = 1 << 3
+ hwcap_PMULL = 1 << 4
+ hwcap_SHA1 = 1 << 5
+ hwcap_SHA2 = 1 << 6
+ hwcap_CRC32 = 1 << 7
+ hwcap_ATOMICS = 1 << 8
+ hwcap_FPHP = 1 << 9
+ hwcap_ASIMDHP = 1 << 10
+ hwcap_CPUID = 1 << 11
+ hwcap_ASIMDRDM = 1 << 12
+ hwcap_JSCVT = 1 << 13
+ hwcap_FCMA = 1 << 14
+ hwcap_LRCPC = 1 << 15
+ hwcap_DCPOP = 1 << 16
+ hwcap_SHA3 = 1 << 17
+ hwcap_SM3 = 1 << 18
+ hwcap_SM4 = 1 << 19
+ hwcap_ASIMDDP = 1 << 20
+ hwcap_SHA512 = 1 << 21
+ hwcap_SVE = 1 << 22
+ hwcap_ASIMDFHM = 1 << 23
+)
+
+func detectOS(c *CPUInfo) bool {
+ // For now assuming no hyperthreading is reasonable.
+ c.LogicalCores = runtime.NumCPU()
+ c.PhysicalCores = c.LogicalCores
+ c.ThreadsPerCore = 1
+ if hwcap == 0 {
+ // We did not get values from the runtime.
+ // Try reading /proc/self/auxv
+
+ // From https://github.com/golang/sys
+ const (
+ _AT_HWCAP = 16
+ _AT_HWCAP2 = 26
+
+ uintSize = int(32 << (^uint(0) >> 63))
+ )
+
+ buf, err := ioutil.ReadFile("/proc/self/auxv")
+ if err != nil {
+ // e.g. on android /proc/self/auxv is not accessible, so silently
+ // ignore the error and leave Initialized = false. On some
+ // architectures (e.g. arm64) doinit() implements a fallback
+ // readout and will set Initialized = true again.
+ return false
+ }
+ bo := binary.LittleEndian
+ for len(buf) >= 2*(uintSize/8) {
+ var tag, val uint
+ switch uintSize {
+ case 32:
+ tag = uint(bo.Uint32(buf[0:]))
+ val = uint(bo.Uint32(buf[4:]))
+ buf = buf[8:]
+ case 64:
+ tag = uint(bo.Uint64(buf[0:]))
+ val = uint(bo.Uint64(buf[8:]))
+ buf = buf[16:]
+ }
+ switch tag {
+ case _AT_HWCAP:
+ hwcap = val
+ case _AT_HWCAP2:
+ // Not used
+ }
+ }
+ if hwcap == 0 {
+ return false
+ }
+ }
+
+ // HWCap was populated by the runtime from the auxiliary vector.
+ // Use HWCap information since reading aarch64 system registers
+ // is not supported in user space on older linux kernels.
+ c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
+ c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
+ c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
+ c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
+ c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
+ c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
+ c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
+ c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
+ c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
+ c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
+ c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
+ c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
+ c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
+ c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
+ c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
+ c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
+ c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
+ c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
+ c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
+ c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
+ c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
+ c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
+
+ // The Samsung S9+ kernel reports support for atomics, but not all cores
+ // actually support them, resulting in SIGILL. See issue #28431.
+ // TODO(elias.naur): Only disable the optimization on bad chipsets on android.
+ c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
+
+ return true
+}
+
+func isSet(hwc uint, value uint) bool {
+ return hwc&value != 0
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
new file mode 100644
index 000000000..8733ba343
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
@@ -0,0 +1,16 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build arm64 && !linux && !darwin
+// +build arm64,!linux,!darwin
+
+package cpuid
+
+import "runtime"
+
+func detectOS(c *CPUInfo) bool {
+ c.PhysicalCores = runtime.NumCPU()
+ // For now assuming 1 thread per core...
+ c.ThreadsPerCore = 1
+ c.LogicalCores = c.PhysicalCores
+ return false
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
new file mode 100644
index 000000000..f8f201b5f
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
@@ -0,0 +1,8 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build nounsafe
+// +build nounsafe
+
+package cpuid
+
+var hwcap uint
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
new file mode 100644
index 000000000..92af622eb
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
@@ -0,0 +1,11 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build !nounsafe
+// +build !nounsafe
+
+package cpuid
+
+import _ "unsafe" // needed for go:linkname
+
+//go:linkname hwcap internal/cpu.HWCap
+var hwcap uint
diff --git a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
new file mode 100644
index 000000000..471d986d2
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -e
+
+go tool dist list | while IFS=/ read os arch; do
+ echo "Checking $os/$arch..."
+ echo " normal"
+ GOARCH=$arch GOOS=$os go build -o /dev/null .
+ echo " noasm"
+ GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
+ echo " appengine"
+ GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
+ echo " noasm,appengine"
+ GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
+done