22 files changed, 2315 insertions, 1830 deletions
diff --git a/vendor/github.com/klauspost/cpuid/.travis.yml b/vendor/github.com/klauspost/cpuid/.travis.yml
deleted file mode 100644
index 77d975fe2..000000000
--- a/vendor/github.com/klauspost/cpuid/.travis.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-language: go
-
-os:
-  - linux
-  - osx
-  - windows
-
-arch:
-  - amd64
-  - arm64
-
-go:
-  - 1.12.x
-  - 1.13.x
-  - 1.14.x
-  - master
-
-script:
-  - go vet ./...
-  - go test -race ./...
-  - go test -tags=noasm ./...
-
-stages:
-  - gofmt
-  - test
-
-matrix:
-  allow_failures:
-    - go: 'master'
-  fast_finish: true
-  include:
-    - stage: gofmt
-      go: 1.14.x
-      os: linux
-      arch: amd64
-      script:
-        - diff <(gofmt -d .) <(printf "")
-        - diff <(gofmt -d ./private) <(printf "")
-        - go install github.com/klauspost/asmfmt/cmd/asmfmt
-        - diff <(asmfmt -d .) <(printf "")
-    - stage: i386
-      go: 1.14.x
-      os: linux
-      arch: amd64
-      script:
-        - GOOS=linux GOARCH=386 go test .
diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md
deleted file mode 100644
index 38d4a8b93..000000000
--- a/vendor/github.com/klauspost/cpuid/README.md
+++ /dev/null
@@ -1,191 +0,0 @@
-# cpuid
-Package cpuid provides information about the CPU running the current program.
-
-CPU features are detected on startup, and kept for fast access through the life of the application.
-Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
-
-You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-
-Package home: https://github.com/klauspost/cpuid
-
-[![GoDoc][1]][2] [![Build Status][3]][4]
-
-[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
-[2]: https://godoc.org/github.com/klauspost/cpuid
-[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
-[4]: https://travis-ci.org/klauspost/cpuid
-
-# features
-
-## x86 CPU Instructions
-*  **CMOV** (i686 CMOV)
-*  **NX** (NX (No-Execute) bit)
-*  **AMD3DNOW** (AMD 3DNOW)
-*  **AMD3DNOWEXT** (AMD 3DNowExt)
-*  **MMX** (standard MMX)
-*  **MMXEXT** (SSE integer functions or AMD MMX ext)
-*  **SSE** (SSE functions)
-*  **SSE2** (P4 SSE functions)
-*  **SSE3** (Prescott SSE3 functions)
-*  **SSSE3** (Conroe SSSE3 functions)
-*  **SSE4** (Penryn SSE4.1 functions)
-*  **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
-*  **SSE42** (Nehalem SSE4.2 functions)
-*  **AVX** (AVX functions)
-*  **AVX2** (AVX2 functions)
-*  **FMA3** (Intel FMA 3)
-*  **FMA4** (Bulldozer FMA4 functions)
-*  **XOP** (Bulldozer XOP functions)
-*  **F16C** (Half-precision floating-point conversion)
-*  **BMI1** (Bit Manipulation Instruction Set 1)
-*  **BMI2** (Bit Manipulation Instruction Set 2)
-*  **TBM** (AMD Trailing Bit Manipulation)
-*  **LZCNT** (LZCNT instruction)
-*  **POPCNT** (POPCNT instruction)
-*  **AESNI** (Advanced Encryption Standard New Instructions)
-*  **CLMUL** (Carry-less Multiplication)
-*  **HTT** (Hyperthreading (enabled))
-*  **HLE** (Hardware Lock Elision)
-*  **RTM** (Restricted Transactional Memory)
-*  **RDRAND** (RDRAND instruction is available)
-*  **RDSEED** (RDSEED instruction is available)
-*  **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
-*  **SHA** (Intel SHA Extensions)
-*  **AVX512F** (AVX-512 Foundation)
-*  **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
-*  **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
-*  **AVX512PF** (AVX-512 Prefetch Instructions)
-*  **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
-*  **AVX512CD** (AVX-512 Conflict Detection Instructions)
-*  **AVX512BW** (AVX-512 Byte and Word Instructions)
-*  **AVX512VL** (AVX-512 Vector Length Extensions)
-*  **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
-*  **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2)
-*  **AVX512VNNI** (AVX-512 Vector Neural Network Instructions)
-*  **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword)
-*  **GFNI** (Galois Field New Instructions)
-*  **VAES** (Vector AES)
-*  **AVX512BITALG** (AVX-512 Bit Algorithms)
-*  **VPCLMULQDQ** (Carry-Less Multiplication Quadword)
-*  **AVX512BF16** (AVX-512 BFLOAT16 Instructions)
-*  **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q)
-*  **MPX** (Intel MPX (Memory Protection Extensions))
-*  **ERMS** (Enhanced REP MOVSB/STOSB)
-*  **RDTSCP** (RDTSCP Instruction)
-*  **CX16** (CMPXCHG16B Instruction)
-*  **SGX** (Software Guard Extensions, with activation details)
-*  **VMX** (Virtual Machine Extensions)
-
-## Performance
-*  **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
-*  **SSE2SLOW** (SSE2 is supported, but usually not faster)
-*  **SSE3SLOW** (SSE3 is supported, but usually not faster)
-*  **ATOM** (Atom processor, some SSSE3 instructions are slower)
-*  **Cache line** (Probable size of a cache line).
-*  **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
-
-## ARM CPU features
-
-# ARM FEATURE DETECTION DISABLED!
-
-See [#52](https://github.com/klauspost/cpuid/issues/52).
- 
-Currently only `arm64` platforms are implemented. 
-
-*  **FP**  Single-precision and double-precision floating point
-*  **ASIMD**  Advanced SIMD
-*  **EVTSTRM**  Generic timer
-*  **AES**  AES instructions
-*  **PMULL**  Polynomial Multiply instructions (PMULL/PMULL2)
-*  **SHA1**  SHA-1 instructions (SHA1C, etc)
-*  **SHA2**      SHA-2 instructions (SHA256H, etc)
-*  **CRC32**   CRC32/CRC32C instructions
-*  **ATOMICS**   Large System Extensions (LSE)
-*  **FPHP** Half-precision floating point
-*  **ASIMDHP**  Advanced SIMD half-precision floating point
-*  **ARMCPUID**  Some CPU ID registers readable at user-level
-*  **ASIMDRDM**  Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
-*  **JSCVT** Javascript-style double->int convert (FJCVTZS)
-*  **FCMA**  Floating point complex number addition and multiplication
-*  **LRCPC**  Weaker release consistency (LDAPR, etc)
-*  **DCPOP**  Data cache clean to Point of Persistence (DC CVAP)
-*  **SHA3**  SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
-*  **SM3** SM3 instructions
-*  **SM4**  SM4 instructions
-*  **ASIMDDP**  SIMD Dot Product
-*  **SHA512**  SHA512 instructions
-*  **SVE** Scalable Vector Extension
-*  **GPA**  Generic Pointer Authentication
-
-## Cpu Vendor/VM
-* **Intel**
-* **AMD**
-* **VIA**
-* **Transmeta**
-* **NSC**
-* **KVM**  (Kernel-based Virtual Machine)
-* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
-* **VMware**
-* **XenHVM**
-* **Bhyve**
-* **Hygon**
-
-# installing
-
-```go get github.com/klauspost/cpuid```
-
-# example
-
-```Go
-package main
-
-import (
-	"fmt"
-	"github.com/klauspost/cpuid"
-)
-
-func main() {
-	// Print basic CPU information:
-	fmt.Println("Name:", cpuid.CPU.BrandName)
-	fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
-	fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
-	fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
-	fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
-	fmt.Println("Features:", cpuid.CPU.Features)
-	fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
-	fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
-	fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
-	fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
-	fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
-
-	// Test if we have a specific feature:
-	if cpuid.CPU.SSE() {
-		fmt.Println("We have Streaming SIMD Extensions")
-	}
-}
-```
-
-Sample output:
-```
->go run main.go
-Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
-PhysicalCores: 2
-ThreadsPerCore: 2
-LogicalCores: 4
-Family 6 Model: 42
-Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
-Cacheline bytes: 64
-We have Streaming SIMD Extensions
-```
-
-# private package
-
-In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
-
-For this purpose all exports are removed, and functions and constants are lowercased.
-
-This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
-
-# license
-
-This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go
deleted file mode 100644
index 208b3e79b..000000000
--- a/vendor/github.com/klauspost/cpuid/cpuid.go
+++ /dev/null
@@ -1,1504 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-// Package cpuid provides information about the CPU running the current program.
-//
-// CPU features are detected on startup, and kept for fast access through the life of the application.
-// Currently x86 / x64 (AMD64) as well as arm64 is supported.
-//
-// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-//
-// Package home: https://github.com/klauspost/cpuid
-package cpuid
-
-import (
-	"math"
-	"strings"
-)
-
-// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
-// and Processor Programming Reference (PPR)
-
-// Vendor is a representation of a CPU vendor.
-type Vendor int
-
-const (
-	Other Vendor = iota
-	Intel
-	AMD
-	VIA
-	Transmeta
-	NSC
-	KVM  // Kernel-based Virtual Machine
-	MSVM // Microsoft Hyper-V or Windows Virtual PC
-	VMware
-	XenHVM
-	Bhyve
-	Hygon
-	SiS
-	RDC
-)
-
-const (
-	CMOV               = 1 << iota // i686 CMOV
-	NX                             // NX (No-Execute) bit
-	AMD3DNOW                       // AMD 3DNOW
-	AMD3DNOWEXT                    // AMD 3DNowExt
-	MMX                            // standard MMX
-	MMXEXT                         // SSE integer functions or AMD MMX ext
-	SSE                            // SSE functions
-	SSE2                           // P4 SSE functions
-	SSE3                           // Prescott SSE3 functions
-	SSSE3                          // Conroe SSSE3 functions
-	SSE4                           // Penryn SSE4.1 functions
-	SSE4A                          // AMD Barcelona microarchitecture SSE4a instructions
-	SSE42                          // Nehalem SSE4.2 functions
-	AVX                            // AVX functions
-	AVX2                           // AVX2 functions
-	FMA3                           // Intel FMA 3
-	FMA4                           // Bulldozer FMA4 functions
-	XOP                            // Bulldozer XOP functions
-	F16C                           // Half-precision floating-point conversion
-	BMI1                           // Bit Manipulation Instruction Set 1
-	BMI2                           // Bit Manipulation Instruction Set 2
-	TBM                            // AMD Trailing Bit Manipulation
-	LZCNT                          // LZCNT instruction
-	POPCNT                         // POPCNT instruction
-	AESNI                          // Advanced Encryption Standard New Instructions
-	CLMUL                          // Carry-less Multiplication
-	HTT                            // Hyperthreading (enabled)
-	HLE                            // Hardware Lock Elision
-	RTM                            // Restricted Transactional Memory
-	RDRAND                         // RDRAND instruction is available
-	RDSEED                         // RDSEED instruction is available
-	ADX                            // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	SHA                            // Intel SHA Extensions
-	AVX512F                        // AVX-512 Foundation
-	AVX512DQ                       // AVX-512 Doubleword and Quadword Instructions
-	AVX512IFMA                     // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512PF                       // AVX-512 Prefetch Instructions
-	AVX512ER                       // AVX-512 Exponential and Reciprocal Instructions
-	AVX512CD                       // AVX-512 Conflict Detection Instructions
-	AVX512BW                       // AVX-512 Byte and Word Instructions
-	AVX512VL                       // AVX-512 Vector Length Extensions
-	AVX512VBMI                     // AVX-512 Vector Bit Manipulation Instructions
-	AVX512VBMI2                    // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVX512VNNI                     // AVX-512 Vector Neural Network Instructions
-	AVX512VPOPCNTDQ                // AVX-512 Vector Population Count Doubleword and Quadword
-	GFNI                           // Galois Field New Instructions
-	VAES                           // Vector AES
-	AVX512BITALG                   // AVX-512 Bit Algorithms
-	VPCLMULQDQ                     // Carry-Less Multiplication Quadword
-	AVX512BF16                     // AVX-512 BFLOAT16 Instructions
-	AVX512VP2INTERSECT             // AVX-512 Intersect for D/Q
-	MPX                            // Intel MPX (Memory Protection Extensions)
-	ERMS                           // Enhanced REP MOVSB/STOSB
-	RDTSCP                         // RDTSCP Instruction
-	CX16                           // CMPXCHG16B Instruction
-	SGX                            // Software Guard Extensions
-	SGXLC                          // Software Guard Extensions Launch Control
-	IBPB                           // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
-	STIBP                          // Single Thread Indirect Branch Predictors
-	VMX                            // Virtual Machine Extensions
-
-	// Performance indicators
-	SSE2SLOW // SSE2 is supported, but usually not faster
-	SSE3SLOW // SSE3 is supported, but usually not faster
-	ATOM     // Atom processor, some SSSE3 instructions are slower
-)
-
-var flagNames = map[Flags]string{
-	CMOV:               "CMOV",               // i686 CMOV
-	NX:                 "NX",                 // NX (No-Execute) bit
-	AMD3DNOW:           "AMD3DNOW",           // AMD 3DNOW
-	AMD3DNOWEXT:        "AMD3DNOWEXT",        // AMD 3DNowExt
-	MMX:                "MMX",                // Standard MMX
-	MMXEXT:             "MMXEXT",             // SSE integer functions or AMD MMX ext
-	SSE:                "SSE",                // SSE functions
-	SSE2:               "SSE2",               // P4 SSE2 functions
-	SSE3:               "SSE3",               // Prescott SSE3 functions
-	SSSE3:              "SSSE3",              // Conroe SSSE3 functions
-	SSE4:               "SSE4.1",             // Penryn SSE4.1 functions
-	SSE4A:              "SSE4A",              // AMD Barcelona microarchitecture SSE4a instructions
-	SSE42:              "SSE4.2",             // Nehalem SSE4.2 functions
-	AVX:                "AVX",                // AVX functions
-	AVX2:               "AVX2",               // AVX functions
-	FMA3:               "FMA3",               // Intel FMA 3
-	FMA4:               "FMA4",               // Bulldozer FMA4 functions
-	XOP:                "XOP",                // Bulldozer XOP functions
-	F16C:               "F16C",               // Half-precision floating-point conversion
-	BMI1:               "BMI1",               // Bit Manipulation Instruction Set 1
-	BMI2:               "BMI2",               // Bit Manipulation Instruction Set 2
-	TBM:                "TBM",                // AMD Trailing Bit Manipulation
-	LZCNT:              "LZCNT",              // LZCNT instruction
-	POPCNT:             "POPCNT",             // POPCNT instruction
-	AESNI:              "AESNI",              // Advanced Encryption Standard New Instructions
-	CLMUL:              "CLMUL",              // Carry-less Multiplication
-	HTT:                "HTT",                // Hyperthreading (enabled)
-	HLE:                "HLE",                // Hardware Lock Elision
-	RTM:                "RTM",                // Restricted Transactional Memory
-	RDRAND:             "RDRAND",             // RDRAND instruction is available
-	RDSEED:             "RDSEED",             // RDSEED instruction is available
-	ADX:                "ADX",                // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	SHA:                "SHA",                // Intel SHA Extensions
-	AVX512F:            "AVX512F",            // AVX-512 Foundation
-	AVX512DQ:           "AVX512DQ",           // AVX-512 Doubleword and Quadword Instructions
-	AVX512IFMA:         "AVX512IFMA",         // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512PF:           "AVX512PF",           // AVX-512 Prefetch Instructions
-	AVX512ER:           "AVX512ER",           // AVX-512 Exponential and Reciprocal Instructions
-	AVX512CD:           "AVX512CD",           // AVX-512 Conflict Detection Instructions
-	AVX512BW:           "AVX512BW",           // AVX-512 Byte and Word Instructions
-	AVX512VL:           "AVX512VL",           // AVX-512 Vector Length Extensions
-	AVX512VBMI:         "AVX512VBMI",         // AVX-512 Vector Bit Manipulation Instructions
-	AVX512VBMI2:        "AVX512VBMI2",        // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVX512VNNI:         "AVX512VNNI",         // AVX-512 Vector Neural Network Instructions
-	AVX512VPOPCNTDQ:    "AVX512VPOPCNTDQ",    // AVX-512 Vector Population Count Doubleword and Quadword
-	GFNI:               "GFNI",               // Galois Field New Instructions
-	VAES:               "VAES",               // Vector AES
-	AVX512BITALG:       "AVX512BITALG",       // AVX-512 Bit Algorithms
-	VPCLMULQDQ:         "VPCLMULQDQ",         // Carry-Less Multiplication Quadword
-	AVX512BF16:         "AVX512BF16",         // AVX-512 BFLOAT16 Instruction
-	AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
-	MPX:                "MPX",                // Intel MPX (Memory Protection Extensions)
-	ERMS:               "ERMS",               // Enhanced REP MOVSB/STOSB
-	RDTSCP:             "RDTSCP",             // RDTSCP Instruction
-	CX16:               "CX16",               // CMPXCHG16B Instruction
-	SGX:                "SGX",                // Software Guard Extensions
-	SGXLC:              "SGXLC",              // Software Guard Extensions Launch Control
-	IBPB:               "IBPB",               // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
-	STIBP:              "STIBP",              // Single Thread Indirect Branch Predictors
-	VMX:                "VMX",                // Virtual Machine Extensions
-
-	// Performance indicators
-	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
-	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
-	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
-
-}
-
-/* all special features for arm64 should be defined here */
-const (
-	/* extension instructions */
-	FP ArmFlags = 1 << iota
-	ASIMD
-	EVTSTRM
-	AES
-	PMULL
-	SHA1
-	SHA2
-	CRC32
-	ATOMICS
-	FPHP
-	ASIMDHP
-	ARMCPUID
-	ASIMDRDM
-	JSCVT
-	FCMA
-	LRCPC
-	DCPOP
-	SHA3
-	SM3
-	SM4
-	ASIMDDP
-	SHA512
-	SVE
-	GPA
-)
-
-var flagNamesArm = map[ArmFlags]string{
-	FP:       "FP",       // Single-precision and double-precision floating point
-	ASIMD:    "ASIMD",    // Advanced SIMD
-	EVTSTRM:  "EVTSTRM",  // Generic timer
-	AES:      "AES",      // AES instructions
-	PMULL:    "PMULL",    // Polynomial Multiply instructions (PMULL/PMULL2)
-	SHA1:     "SHA1",     // SHA-1 instructions (SHA1C, etc)
-	SHA2:     "SHA2",     // SHA-2 instructions (SHA256H, etc)
-	CRC32:    "CRC32",    // CRC32/CRC32C instructions
-	ATOMICS:  "ATOMICS",  // Large System Extensions (LSE)
-	FPHP:     "FPHP",     // Half-precision floating point
-	ASIMDHP:  "ASIMDHP",  // Advanced SIMD half-precision floating point
-	ARMCPUID: "CPUID",    // Some CPU ID registers readable at user-level
-	ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
-	JSCVT:    "JSCVT",    // Javascript-style double->int convert (FJCVTZS)
-	FCMA:     "FCMA",     // Floatin point complex number addition and multiplication
-	LRCPC:    "LRCPC",    // Weaker release consistency (LDAPR, etc)
-	DCPOP:    "DCPOP",    // Data cache clean to Point of Persistence (DC CVAP)
-	SHA3:     "SHA3",     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
-	SM3:      "SM3",      // SM3 instructions
-	SM4:      "SM4",      // SM4 instructions
-	ASIMDDP:  "ASIMDDP",  // SIMD Dot Product
-	SHA512:   "SHA512",   // SHA512 instructions
-	SVE:      "SVE",      // Scalable Vector Extension
-	GPA:      "GPA",      // Generic Pointer Authentication
-}
-
-// CPUInfo contains information about the detected system CPU.
-type CPUInfo struct {
-	BrandName      string   // Brand name reported by the CPU
-	VendorID       Vendor   // Comparable CPU vendor ID
-	VendorString   string   // Raw vendor string.
-	Features       Flags    // Features of the CPU (x64)
-	Arm            ArmFlags // Features of the CPU (arm)
-	PhysicalCores  int      // Number of physical processor cores in your CPU. Will be 0 if undetectable.
-	ThreadsPerCore int      // Number of threads per physical core. Will be 1 if undetectable.
-	LogicalCores   int      // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
-	Family         int      // CPU family number
-	Model          int      // CPU model number
-	CacheLine      int      // Cache line size in bytes. Will be 0 if undetectable.
-	Hz             int64    // Clock speed, if known
-	Cache          struct {
-		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
-		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
-		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
-		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
-	}
-	SGX       SGXSupport
-	maxFunc   uint32
-	maxExFunc uint32
-}
-
-var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
-var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-var xgetbv func(index uint32) (eax, edx uint32)
-var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
-
-// CPU contains information about the CPU as detected on startup,
-// or when Detect last was called.
-//
-// Use this as the primary entry point to you data.
-var CPU CPUInfo
-
-func init() {
-	initCPU()
-	Detect()
-}
-
-// Detect will re-detect current CPU info.
-// This will replace the content of the exported CPU variable.
-//
-// Unless you expect the CPU to change while you are running your program
-// you should not need to call this function.
-// If you call this, you must ensure that no other goroutine is accessing the
-// exported CPU variable.
-func Detect() {
-	// Set defaults
-	CPU.ThreadsPerCore = 1
-	CPU.Cache.L1I = -1
-	CPU.Cache.L1D = -1
-	CPU.Cache.L2 = -1
-	CPU.Cache.L3 = -1
-	addInfo(&CPU)
-}
-
-// Generated here: http://play.golang.org/p/BxFH2Gdc0G
-
-// Cmov indicates support of CMOV instructions
-func (c CPUInfo) Cmov() bool {
-	return c.Features&CMOV != 0
-}
-
-// Amd3dnow indicates support of AMD 3DNOW! instructions
-func (c CPUInfo) Amd3dnow() bool {
-	return c.Features&AMD3DNOW != 0
-}
-
-// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
-func (c CPUInfo) Amd3dnowExt() bool {
-	return c.Features&AMD3DNOWEXT != 0
-}
-
-// VMX indicates support of VMX
-func (c CPUInfo) VMX() bool {
-	return c.Features&VMX != 0
-}
-
-// MMX indicates support of MMX instructions
-func (c CPUInfo) MMX() bool {
-	return c.Features&MMX != 0
-}
-
-// MMXExt indicates support of MMXEXT instructions
-// (SSE integer functions or AMD MMX ext)
-func (c CPUInfo) MMXExt() bool {
-	return c.Features&MMXEXT != 0
-}
-
-// SSE indicates support of SSE instructions
-func (c CPUInfo) SSE() bool {
-	return c.Features&SSE != 0
-}
-
-// SSE2 indicates support of SSE 2 instructions
-func (c CPUInfo) SSE2() bool {
-	return c.Features&SSE2 != 0
-}
-
-// SSE3 indicates support of SSE 3 instructions
-func (c CPUInfo) SSE3() bool {
-	return c.Features&SSE3 != 0
-}
-
-// SSSE3 indicates support of SSSE 3 instructions
-func (c CPUInfo) SSSE3() bool {
-	return c.Features&SSSE3 != 0
-}
-
-// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
-func (c CPUInfo) SSE4() bool {
-	return c.Features&SSE4 != 0
-}
-
-// SSE42 indicates support of SSE4.2 instructions
-func (c CPUInfo) SSE42() bool {
-	return c.Features&SSE42 != 0
-}
-
-// AVX indicates support of AVX instructions
-// and operating system support of AVX instructions
-func (c CPUInfo) AVX() bool {
-	return c.Features&AVX != 0
-}
-
-// AVX2 indicates support of AVX2 instructions
-func (c CPUInfo) AVX2() bool {
-	return c.Features&AVX2 != 0
-}
-
-// FMA3 indicates support of FMA3 instructions
-func (c CPUInfo) FMA3() bool {
-	return c.Features&FMA3 != 0
-}
-
-// FMA4 indicates support of FMA4 instructions
-func (c CPUInfo) FMA4() bool {
-	return c.Features&FMA4 != 0
-}
-
-// XOP indicates support of XOP instructions
-func (c CPUInfo) XOP() bool {
-	return c.Features&XOP != 0
-}
-
-// F16C indicates support of F16C instructions
-func (c CPUInfo) F16C() bool {
-	return c.Features&F16C != 0
-}
-
-// BMI1 indicates support of BMI1 instructions
-func (c CPUInfo) BMI1() bool {
-	return c.Features&BMI1 != 0
-}
-
-// BMI2 indicates support of BMI2 instructions
-func (c CPUInfo) BMI2() bool {
-	return c.Features&BMI2 != 0
-}
-
-// TBM indicates support of TBM instructions
-// (AMD Trailing Bit Manipulation)
-func (c CPUInfo) TBM() bool {
-	return c.Features&TBM != 0
-}
-
-// Lzcnt indicates support of LZCNT instruction
-func (c CPUInfo) Lzcnt() bool {
-	return c.Features&LZCNT != 0
-}
-
-// Popcnt indicates support of POPCNT instruction
-func (c CPUInfo) Popcnt() bool {
-	return c.Features&POPCNT != 0
-}
-
-// HTT indicates the processor has Hyperthreading enabled
-func (c CPUInfo) HTT() bool {
-	return c.Features&HTT != 0
-}
-
-// SSE2Slow indicates that SSE2 may be slow on this processor
-func (c CPUInfo) SSE2Slow() bool {
-	return c.Features&SSE2SLOW != 0
-}
-
-// SSE3Slow indicates that SSE3 may be slow on this processor
-func (c CPUInfo) SSE3Slow() bool {
-	return c.Features&SSE3SLOW != 0
-}
-
-// AesNi indicates support of AES-NI instructions
-// (Advanced Encryption Standard New Instructions)
-func (c CPUInfo) AesNi() bool {
-	return c.Features&AESNI != 0
-}
-
-// Clmul indicates support of CLMUL instructions
-// (Carry-less Multiplication)
-func (c CPUInfo) Clmul() bool {
-	return c.Features&CLMUL != 0
-}
-
-// NX indicates support of NX (No-Execute) bit
-func (c CPUInfo) NX() bool {
-	return c.Features&NX != 0
-}
-
-// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
-func (c CPUInfo) SSE4A() bool {
-	return c.Features&SSE4A != 0
-}
-
-// HLE indicates support of Hardware Lock Elision
-func (c CPUInfo) HLE() bool {
-	return c.Features&HLE != 0
-}
-
-// RTM indicates support of Restricted Transactional Memory
-func (c CPUInfo) RTM() bool {
-	return c.Features&RTM != 0
-}
-
-// Rdrand indicates support of RDRAND instruction is available
-func (c CPUInfo) Rdrand() bool {
-	return c.Features&RDRAND != 0
-}
-
-// Rdseed indicates support of RDSEED instruction is available
-func (c CPUInfo) Rdseed() bool {
-	return c.Features&RDSEED != 0
-}
-
-// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-func (c CPUInfo) ADX() bool {
-	return c.Features&ADX != 0
-}
-
-// SHA indicates support of Intel SHA Extensions
-func (c CPUInfo) SHA() bool {
-	return c.Features&SHA != 0
-}
-
-// AVX512F indicates support of AVX-512 Foundation
-func (c CPUInfo) AVX512F() bool {
-	return c.Features&AVX512F != 0
-}
-
-// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
-func (c CPUInfo) AVX512DQ() bool {
-	return c.Features&AVX512DQ != 0
-}
-
-// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
-func (c CPUInfo) AVX512IFMA() bool {
-	return c.Features&AVX512IFMA != 0
-}
-
-// AVX512PF indicates support of AVX-512 Prefetch Instructions
-func (c CPUInfo) AVX512PF() bool {
-	return c.Features&AVX512PF != 0
-}
-
-// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
-func (c CPUInfo) AVX512ER() bool {
-	return c.Features&AVX512ER != 0
-}
-
-// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
-func (c CPUInfo) AVX512CD() bool {
-	return c.Features&AVX512CD != 0
-}
-
-// AVX512BW indicates support of AVX-512 Byte and Word Instructions
-func (c CPUInfo) AVX512BW() bool {
-	return c.Features&AVX512BW != 0
-}
-
-// AVX512VL indicates support of AVX-512 Vector Length Extensions
-func (c CPUInfo) AVX512VL() bool {
-	return c.Features&AVX512VL != 0
-}
-
-// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
-func (c CPUInfo) AVX512VBMI() bool {
-	return c.Features&AVX512VBMI != 0
-}
-
-// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
-func (c CPUInfo) AVX512VBMI2() bool {
-	return c.Features&AVX512VBMI2 != 0
-}
-
-// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
-func (c CPUInfo) AVX512VNNI() bool {
-	return c.Features&AVX512VNNI != 0
-}
-
-// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
-func (c CPUInfo) AVX512VPOPCNTDQ() bool {
-	return c.Features&AVX512VPOPCNTDQ != 0
-}
-
-// GFNI indicates support of Galois Field New Instructions
-func (c CPUInfo) GFNI() bool {
-	return c.Features&GFNI != 0
-}
-
-// VAES indicates support of Vector AES
-func (c CPUInfo) VAES() bool {
-	return c.Features&VAES != 0
-}
-
-// AVX512BITALG indicates support of AVX-512 Bit Algorithms
-func (c CPUInfo) AVX512BITALG() bool {
-	return c.Features&AVX512BITALG != 0
-}
-
-// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
-func (c CPUInfo) VPCLMULQDQ() bool {
-	return c.Features&VPCLMULQDQ != 0
-}
-
-// AVX512BF16 indicates support of
-func (c CPUInfo) AVX512BF16() bool {
-	return c.Features&AVX512BF16 != 0
-}
-
-// AVX512VP2INTERSECT indicates support of
-func (c CPUInfo) AVX512VP2INTERSECT() bool {
-	return c.Features&AVX512VP2INTERSECT != 0
-}
-
-// MPX indicates support of Intel MPX (Memory Protection Extensions)
-func (c CPUInfo) MPX() bool {
-	return c.Features&MPX != 0
-}
-
-// ERMS indicates support of Enhanced REP MOVSB/STOSB
-func (c CPUInfo) ERMS() bool {
-	return c.Features&ERMS != 0
-}
-
-// RDTSCP Instruction is available.
-func (c CPUInfo) RDTSCP() bool {
-	return c.Features&RDTSCP != 0
-}
-
-// CX16 indicates if CMPXCHG16B instruction is available.
-func (c CPUInfo) CX16() bool {
-	return c.Features&CX16 != 0
-}
-
-// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
-// So TSX simply checks that.
-func (c CPUInfo) TSX() bool {
-	return c.Features&(HLE|RTM) == HLE|RTM
-}
-
-// Atom indicates an Atom processor
-func (c CPUInfo) Atom() bool {
-	return c.Features&ATOM != 0
-}
-
-// Intel returns true if vendor is recognized as Intel
-func (c CPUInfo) Intel() bool {
-	return c.VendorID == Intel
-}
-
-// AMD returns true if vendor is recognized as AMD
-func (c CPUInfo) AMD() bool {
-	return c.VendorID == AMD
-}
-
-// Hygon returns true if vendor is recognized as Hygon
-func (c CPUInfo) Hygon() bool {
-	return c.VendorID == Hygon
-}
-
-// Transmeta returns true if vendor is recognized as Transmeta
-func (c CPUInfo) Transmeta() bool {
-	return c.VendorID == Transmeta
-}
-
-// NSC returns true if vendor is recognized as National Semiconductor
-func (c CPUInfo) NSC() bool {
-	return c.VendorID == NSC
-}
-
-// VIA returns true if vendor is recognized as VIA
-func (c CPUInfo) VIA() bool {
-	return c.VendorID == VIA
-}
-
-// RTCounter returns the 64-bit time-stamp counter
-// Uses the RDTSCP instruction. The value 0 is returned
-// if the CPU does not support the instruction.
-func (c CPUInfo) RTCounter() uint64 {
-	if !c.RDTSCP() {
-		return 0
-	}
-	a, _, _, d := rdtscpAsm()
-	return uint64(a) | (uint64(d) << 32)
-}
-
-// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
-// This variable is OS dependent, but on Linux contains information
-// about the current cpu/core the code is running on.
-// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
-func (c CPUInfo) Ia32TscAux() uint32 {
-	if !c.RDTSCP() {
-		return 0
-	}
-	_, _, ecx, _ := rdtscpAsm()
-	return ecx
-}
-
-// LogicalCPU will return the Logical CPU the code is currently executing on.
-// This is likely to change when the OS re-schedules the running thread
-// to another CPU.
-// If the current core cannot be detected, -1 will be returned.
-func (c CPUInfo) LogicalCPU() int {
-	if c.maxFunc < 1 {
-		return -1
-	}
-	_, ebx, _, _ := cpuid(1)
-	return int(ebx >> 24)
-}
-
-// hertz tries to compute the clock speed of the CPU. If leaf 15 is
-// supported, use it, otherwise parse the brand string. Yes, really.
-func hertz(model string) int64 {
-	mfi := maxFunctionID()
-	if mfi >= 0x15 {
-		eax, ebx, ecx, _ := cpuid(0x15)
-		if eax != 0 && ebx != 0 && ecx != 0 {
-			return int64((int64(ecx) * int64(ebx)) / int64(eax))
-		}
-	}
-	// computeHz determines the official rated speed of a CPU from its brand
-	// string. This insanity is *actually the official documented way to do
-	// this according to Intel*, prior to leaf 0x15 existing. The official
-	// documentation only shows this working for exactly `x.xx` or `xxxx`
-	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
-	// sizes.
-	hz := strings.LastIndex(model, "Hz")
-	if hz < 3 {
-		return -1
-	}
-	var multiplier int64
-	switch model[hz-1] {
-	case 'M':
-		multiplier = 1000 * 1000
-	case 'G':
-		multiplier = 1000 * 1000 * 1000
-	case 'T':
-		multiplier = 1000 * 1000 * 1000 * 1000
-	}
-	if multiplier == 0 {
-		return -1
-	}
-	freq := int64(0)
-	divisor := int64(0)
-	decimalShift := int64(1)
-	var i int
-	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
-		if model[i] >= '0' && model[i] <= '9' {
-			freq += int64(model[i]-'0') * decimalShift
-			decimalShift *= 10
-		} else if model[i] == '.' {
-			if divisor != 0 {
-				return -1
-			}
-			divisor = decimalShift
-		} else {
-			return -1
-		}
-	}
-	// we didn't find a space
-	if i < 0 {
-		return -1
-	}
-	if divisor != 0 {
-		return (freq * multiplier) / divisor
-	}
-	return freq * multiplier
-}
-
-// VM Will return true if the cpu id indicates we are in
-// a virtual machine. This is only a hint, and will very likely
-// have many false negatives.
-func (c CPUInfo) VM() bool {
-	switch c.VendorID {
-	case MSVM, KVM, VMware, XenHVM, Bhyve:
-		return true
-	}
-	return false
-}
-
-// Flags contains detected cpu features and characteristics
-type Flags uint64
-
-// ArmFlags contains detected ARM cpu features and characteristics
-type ArmFlags uint64
-
-// String returns a string representation of the detected
-// CPU features.
-func (f Flags) String() string {
-	return strings.Join(f.Strings(), ",")
-}
-
-// Strings returns an array of the detected features.
-func (f Flags) Strings() []string {
-	r := make([]string, 0, 20)
-	for i := uint(0); i < 64; i++ {
-		key := Flags(1 << i)
-		val := flagNames[key]
-		if f&key != 0 {
-			r = append(r, val)
-		}
-	}
-	return r
-}
-
-// String returns a string representation of the detected
-// CPU features.
-func (f ArmFlags) String() string {
-	return strings.Join(f.Strings(), ",")
-}
-
-// Strings returns an array of the detected features.
-func (f ArmFlags) Strings() []string {
-	r := make([]string, 0, 20)
-	for i := uint(0); i < 64; i++ {
-		key := ArmFlags(1 << i)
-		val := flagNamesArm[key]
-		if f&key != 0 {
-			r = append(r, val)
-		}
-	}
-	return r
-}
-func maxExtendedFunction() uint32 {
-	eax, _, _, _ := cpuid(0x80000000)
-	return eax
-}
-
-func maxFunctionID() uint32 {
-	a, _, _, _ := cpuid(0)
-	return a
-}
-
-func brandName() string {
-	if maxExtendedFunction() >= 0x80000004 {
-		v := make([]uint32, 0, 48)
-		for i := uint32(0); i < 3; i++ {
-			a, b, c, d := cpuid(0x80000002 + i)
-			v = append(v, a, b, c, d)
-		}
-		return strings.Trim(string(valAsString(v...)), " ")
-	}
-	return "unknown"
-}
-
-func threadsPerCore() int {
-	mfi := maxFunctionID()
-	vend, _ := vendorID()
-
-	if mfi < 0x4 || (vend != Intel && vend != AMD) {
-		return 1
-	}
-
-	if mfi < 0xb {
-		if vend != Intel {
-			return 1
-		}
-		_, b, _, d := cpuid(1)
-		if (d & (1 << 28)) != 0 {
-			// v will contain logical core count
-			v := (b >> 16) & 255
-			if v > 1 {
-				a4, _, _, _ := cpuid(4)
-				// physical cores
-				v2 := (a4 >> 26) + 1
-				if v2 > 0 {
-					return int(v) / int(v2)
-				}
-			}
-		}
-		return 1
-	}
-	_, b, _, _ := cpuidex(0xb, 0)
-	if b&0xffff == 0 {
-		return 1
-	}
-	return int(b & 0xffff)
-}
-
-func logicalCores() int {
-	mfi := maxFunctionID()
-	v, _ := vendorID()
-	switch v {
-	case Intel:
-		// Use this on old Intel processors
-		if mfi < 0xb {
-			if mfi < 1 {
-				return 0
-			}
-			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
-			// that can be assigned to logical processors in a physical package.
-			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
-			_, ebx, _, _ := cpuid(1)
-			logical := (ebx >> 16) & 0xff
-			return int(logical)
-		}
-		_, b, _, _ := cpuidex(0xb, 1)
-		return int(b & 0xffff)
-	case AMD, Hygon:
-		_, b, _, _ := cpuid(1)
-		return int((b >> 16) & 0xff)
-	default:
-		return 0
-	}
-}
-
-func familyModel() (int, int) {
-	if maxFunctionID() < 0x1 {
-		return 0, 0
-	}
-	eax, _, _, _ := cpuid(1)
-	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
-	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
-	return int(family), int(model)
-}
-
-func physicalCores() int {
-	v, _ := vendorID()
-	switch v {
-	case Intel:
-		return logicalCores() / threadsPerCore()
-	case AMD, Hygon:
-		lc := logicalCores()
-		tpc := threadsPerCore()
-		if lc > 0 && tpc > 0 {
-			return lc / tpc
-		}
-		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
-
-		if maxExtendedFunction() >= 0x80000008 {
-			_, _, c, _ := cpuid(0x80000008)
-			return int(c&0xff) + 1
-		}
-	}
-	return 0
-}
-
-// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
-var vendorMapping = map[string]Vendor{
-	"AMDisbetter!": AMD,
-	"AuthenticAMD": AMD,
-	"CentaurHauls": VIA,
-	"GenuineIntel": Intel,
-	"TransmetaCPU": Transmeta,
-	"GenuineTMx86": Transmeta,
-	"Geode by NSC": NSC,
-	"VIA VIA VIA ": VIA,
-	"KVMKVMKVMKVM": KVM,
-	"Microsoft Hv": MSVM,
-	"VMwareVMware": VMware,
-	"XenVMMXenVMM": XenHVM,
-	"bhyve bhyve ": Bhyve,
-	"HygonGenuine": Hygon,
-	"Vortex86 SoC": SiS,
-	"SiS SiS SiS ": SiS,
-	"RiseRiseRise": SiS,
-	"Genuine  RDC": RDC,
-}
-
-func vendorID() (Vendor, string) {
-	_, b, c, d := cpuid(0)
-	v := string(valAsString(b, d, c))
-	vend, ok := vendorMapping[v]
-	if !ok {
-		return Other, v
-	}
-	return vend, v
-}
-
-func cacheLine() int {
-	if maxFunctionID() < 0x1 {
-		return 0
-	}
-
-	_, ebx, _, _ := cpuid(1)
-	cache := (ebx & 0xff00) >> 5 // cflush size
-	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
-		_, _, ecx, _ := cpuid(0x80000006)
-		cache = ecx & 0xff // cacheline size
-	}
-	// TODO: Read from Cache and TLB Information
-	return int(cache)
-}
-
-func (c *CPUInfo) cacheSize() {
-	c.Cache.L1D = -1
-	c.Cache.L1I = -1
-	c.Cache.L2 = -1
-	c.Cache.L3 = -1
-	vendor, _ := vendorID()
-	switch vendor {
-	case Intel:
-		if maxFunctionID() < 4 {
-			return
-		}
-		for i := uint32(0); ; i++ {
-			eax, ebx, ecx, _ := cpuidex(4, i)
-			cacheType := eax & 15
-			if cacheType == 0 {
-				break
-			}
-			cacheLevel := (eax >> 5) & 7
-			coherency := int(ebx&0xfff) + 1
-			partitions := int((ebx>>12)&0x3ff) + 1
-			associativity := int((ebx>>22)&0x3ff) + 1
-			sets := int(ecx) + 1
-			size := associativity * partitions * coherency * sets
-			switch cacheLevel {
-			case 1:
-				if cacheType == 1 {
-					// 1 = Data Cache
-					c.Cache.L1D = size
-				} else if cacheType == 2 {
-					// 2 = Instruction Cache
-					c.Cache.L1I = size
-				} else {
-					if c.Cache.L1D < 0 {
-						c.Cache.L1I = size
-					}
-					if c.Cache.L1I < 0 {
-						c.Cache.L1I = size
-					}
-				}
-			case 2:
-				c.Cache.L2 = size
-			case 3:
-				c.Cache.L3 = size
-			}
-		}
-	case AMD, Hygon:
-		// Untested.
-		if maxExtendedFunction() < 0x80000005 {
-			return
-		}
-		_, _, ecx, edx := cpuid(0x80000005)
-		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
-		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
-
-		if maxExtendedFunction() < 0x80000006 {
-			return
-		}
-		_, _, ecx, _ = cpuid(0x80000006)
-		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
-
-		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
-		if maxExtendedFunction() < 0x8000001D {
-			return
-		}
-		for i := uint32(0); i < math.MaxUint32; i++ {
-			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
-
-			level := (eax >> 5) & 7
-			cacheNumSets := ecx + 1
-			cacheLineSize := 1 + (ebx & 2047)
-			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
-			cacheNumWays := 1 + ((ebx >> 22) & 511)
-
-			typ := eax & 15
-			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
-			if typ == 0 {
-				return
-			}
-
-			switch level {
-			case 1:
-				switch typ {
-				case 1:
-					// Data cache
-					c.Cache.L1D = size
-				case 2:
-					// Inst cache
-					c.Cache.L1I = size
-				default:
-					if c.Cache.L1D < 0 {
-						c.Cache.L1I = size
-					}
-					if c.Cache.L1I < 0 {
-						c.Cache.L1I = size
-					}
-				}
-			case 2:
-				c.Cache.L2 = size
-			case 3:
-				c.Cache.L3 = size
-			}
-		}
-	}
-
-	return
-}
-
-type SGXEPCSection struct {
-	BaseAddress uint64
-	EPCSize     uint64
-}
-
-type SGXSupport struct {
-	Available           bool
-	LaunchControl       bool
-	SGX1Supported       bool
-	SGX2Supported       bool
-	MaxEnclaveSizeNot64 int64
-	MaxEnclaveSize64    int64
-	EPCSections         []SGXEPCSection
-}
-
-func hasSGX(available, lc bool) (rval SGXSupport) {
-	rval.Available = available
-
-	if !available {
-		return
-	}
-
-	rval.LaunchControl = lc
-
-	a, _, _, d := cpuidex(0x12, 0)
-	rval.SGX1Supported = a&0x01 != 0
-	rval.SGX2Supported = a&0x02 != 0
-	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
-	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
-	rval.EPCSections = make([]SGXEPCSection, 0)
-
-	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
-		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
-		leafType := eax & 0xf
-
-		if leafType == 0 {
-			// Invalid subleaf, stop iterating
-			break
-		} else if leafType == 1 {
-			// EPC Section subleaf
-			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
-			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
-
-			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
-			rval.EPCSections = append(rval.EPCSections, section)
-		}
-	}
-
-	return
-}
-
-func support() Flags {
-	mfi := maxFunctionID()
-	vend, _ := vendorID()
-	if mfi < 0x1 {
-		return 0
-	}
-	rval := uint64(0)
-	_, _, c, d := cpuid(1)
-	if (d & (1 << 15)) != 0 {
-		rval |= CMOV
-	}
-	if (d & (1 << 23)) != 0 {
-		rval |= MMX
-	}
-	if (d & (1 << 25)) != 0 {
-		rval |= MMXEXT
-	}
-	if (d & (1 << 25)) != 0 {
-		rval |= SSE
-	}
-	if (d & (1 << 26)) != 0 {
-		rval |= SSE2
-	}
-	if (c & 1) != 0 {
-		rval |= SSE3
-	}
-	if (c & (1 << 5)) != 0 {
-		rval |= VMX
-	}
-	if (c & 0x00000200) != 0 {
-		rval |= SSSE3
-	}
-	if (c & 0x00080000) != 0 {
-		rval |= SSE4
-	}
-	if (c & 0x00100000) != 0 {
-		rval |= SSE42
-	}
-	if (c & (1 << 25)) != 0 {
-		rval |= AESNI
-	}
-	if (c & (1 << 1)) != 0 {
-		rval |= CLMUL
-	}
-	if c&(1<<23) != 0 {
-		rval |= POPCNT
-	}
-	if c&(1<<30) != 0 {
-		rval |= RDRAND
-	}
-	if c&(1<<29) != 0 {
-		rval |= F16C
-	}
-	if c&(1<<13) != 0 {
-		rval |= CX16
-	}
-	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
-		if threadsPerCore() > 1 {
-			rval |= HTT
-		}
-	}
-	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
-		if threadsPerCore() > 1 {
-			rval |= HTT
-		}
-	}
-	// Check XGETBV, OXSAVE and AVX bits
-	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
-		// Check for OS support
-		eax, _ := xgetbv(0)
-		if (eax & 0x6) == 0x6 {
-			rval |= AVX
-			if (c & 0x00001000) != 0 {
-				rval |= FMA3
-			}
-		}
-	}
-
-	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
-	if mfi >= 7 {
-		_, ebx, ecx, edx := cpuidex(7, 0)
-		eax1, _, _, _ := cpuidex(7, 1)
-		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
-			rval |= AVX2
-		}
-		if (ebx & 0x00000008) != 0 {
-			rval |= BMI1
-			if (ebx & 0x00000100) != 0 {
-				rval |= BMI2
-			}
-		}
-		if ebx&(1<<2) != 0 {
-			rval |= SGX
-		}
-		if ebx&(1<<4) != 0 {
-			rval |= HLE
-		}
-		if ebx&(1<<9) != 0 {
-			rval |= ERMS
-		}
-		if ebx&(1<<11) != 0 {
-			rval |= RTM
-		}
-		if ebx&(1<<14) != 0 {
-			rval |= MPX
-		}
-		if ebx&(1<<18) != 0 {
-			rval |= RDSEED
-		}
-		if ebx&(1<<19) != 0 {
-			rval |= ADX
-		}
-		if ebx&(1<<29) != 0 {
-			rval |= SHA
-		}
-		if edx&(1<<26) != 0 {
-			rval |= IBPB
-		}
-		if ecx&(1<<30) != 0 {
-			rval |= SGXLC
-		}
-		if edx&(1<<27) != 0 {
-			rval |= STIBP
-		}
-
-		// Only detect AVX-512 features if XGETBV is supported
-		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
-			// Check for OS support
-			eax, _ := xgetbv(0)
-
-			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
-			// ZMM16-ZMM31 state are enabled by OS)
-			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
-			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
-				if ebx&(1<<16) != 0 {
-					rval |= AVX512F
-				}
-				if ebx&(1<<17) != 0 {
-					rval |= AVX512DQ
-				}
-				if ebx&(1<<21) != 0 {
-					rval |= AVX512IFMA
-				}
-				if ebx&(1<<26) != 0 {
-					rval |= AVX512PF
-				}
-				if ebx&(1<<27) != 0 {
-					rval |= AVX512ER
-				}
-				if ebx&(1<<28) != 0 {
-					rval |= AVX512CD
-				}
-				if ebx&(1<<30) != 0 {
-					rval |= AVX512BW
-				}
-				if ebx&(1<<31) != 0 {
-					rval |= AVX512VL
-				}
-				// ecx
-				if ecx&(1<<1) != 0 {
-					rval |= AVX512VBMI
-				}
-				if ecx&(1<<6) != 0 {
-					rval |= AVX512VBMI2
-				}
-				if ecx&(1<<8) != 0 {
-					rval |= GFNI
-				}
-				if ecx&(1<<9) != 0 {
-					rval |= VAES
-				}
-				if ecx&(1<<10) != 0 {
-					rval |= VPCLMULQDQ
-				}
-				if ecx&(1<<11) != 0 {
-					rval |= AVX512VNNI
-				}
-				if ecx&(1<<12) != 0 {
-					rval |= AVX512BITALG
-				}
-				if ecx&(1<<14) != 0 {
-					rval |= AVX512VPOPCNTDQ
-				}
-				// edx
-				if edx&(1<<8) != 0 {
-					rval |= AVX512VP2INTERSECT
-				}
-				// cpuid eax 07h,ecx=1
-				if eax1&(1<<5) != 0 {
-					rval |= AVX512BF16
-				}
-			}
-		}
-	}
-
-	if maxExtendedFunction() >= 0x80000001 {
-		_, _, c, d := cpuid(0x80000001)
-		if (c & (1 << 5)) != 0 {
-			rval |= LZCNT
-			rval |= POPCNT
-		}
-		if (d & (1 << 31)) != 0 {
-			rval |= AMD3DNOW
-		}
-		if (d & (1 << 30)) != 0 {
-			rval |= AMD3DNOWEXT
-		}
-		if (d & (1 << 23)) != 0 {
-			rval |= MMX
-		}
-		if (d & (1 << 22)) != 0 {
-			rval |= MMXEXT
-		}
-		if (c & (1 << 6)) != 0 {
-			rval |= SSE4A
-		}
-		if d&(1<<20) != 0 {
-			rval |= NX
-		}
-		if d&(1<<27) != 0 {
-			rval |= RDTSCP
-		}
-
-		/* Allow for selectively disabling SSE2 functions on AMD processors
-		   with SSE2 support but not SSE4a. This includes Athlon64, some
-		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
-		   than SSE2 often enough to utilize this special-case flag.
-		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
-		   so that SSE2 is used unless explicitly disabled by checking
-		   AV_CPU_FLAG_SSE2SLOW. */
-		if vend != Intel &&
-			rval&SSE2 != 0 && (c&0x00000040) == 0 {
-			rval |= SSE2SLOW
-		}
-
-		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
-		 * used unless the OS has AVX support. */
-		if (rval & AVX) != 0 {
-			if (c & 0x00000800) != 0 {
-				rval |= XOP
-			}
-			if (c & 0x00010000) != 0 {
-				rval |= FMA4
-			}
-		}
-
-		if vend == Intel {
-			family, model := familyModel()
-			if family == 6 && (model == 9 || model == 13 || model == 14) {
-				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
-				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
-				 * usually slower than mmx. */
-				if (rval & SSE2) != 0 {
-					rval |= SSE2SLOW
-				}
-				if (rval & SSE3) != 0 {
-					rval |= SSE3SLOW
-				}
-			}
-			/* The Atom processor has SSSE3 support, which is useful in many cases,
-			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
-			 * on the Atom, but is generally faster on other processors supporting
-			 * SSSE3. This flag allows for selectively disabling certain SSSE3
-			 * functions on the Atom. */
-			if family == 6 && model == 28 {
-				rval |= ATOM
-			}
-		}
-	}
-	return Flags(rval)
-}
-
-func valAsString(values ...uint32) []byte {
-	r := make([]byte, 4*len(values))
-	for i, v := range values {
-		dst := r[i*4:]
-		dst[0] = byte(v & 0xff)
-		dst[1] = byte((v >> 8) & 0xff)
-		dst[2] = byte((v >> 16) & 0xff)
-		dst[3] = byte((v >> 24) & 0xff)
-		switch {
-		case dst[0] == 0:
-			return r[:i*4]
-		case dst[1] == 0:
-			return r[:i*4+1]
-		case dst[2] == 0:
-			return r[:i*4+2]
-		case dst[3] == 0:
-			return r[:i*4+3]
-		}
-	}
-	return r
-}
-
-// Single-precision and double-precision floating point
-func (c CPUInfo) ArmFP() bool {
-	return c.Arm&FP != 0
-}
-
-// Advanced SIMD
-func (c CPUInfo) ArmASIMD() bool {
-	return c.Arm&ASIMD != 0
-}
-
-// Generic timer
-func (c CPUInfo) ArmEVTSTRM() bool {
-	return c.Arm&EVTSTRM != 0
-}
-
-// AES instructions
-func (c CPUInfo) ArmAES() bool {
-	return c.Arm&AES != 0
-}
-
-// Polynomial Multiply instructions (PMULL/PMULL2)
-func (c CPUInfo) ArmPMULL() bool {
-	return c.Arm&PMULL != 0
-}
-
-// SHA-1 instructions (SHA1C, etc)
-func (c CPUInfo) ArmSHA1() bool {
-	return c.Arm&SHA1 != 0
-}
-
-// SHA-2 instructions (SHA256H, etc)
-func (c CPUInfo) ArmSHA2() bool {
-	return c.Arm&SHA2 != 0
-}
-
-// CRC32/CRC32C instructions
-func (c CPUInfo) ArmCRC32() bool {
-	return c.Arm&CRC32 != 0
-}
-
-// Large System Extensions (LSE)
-func (c CPUInfo) ArmATOMICS() bool {
-	return c.Arm&ATOMICS != 0
-}
-
-// Half-precision floating point
-func (c CPUInfo) ArmFPHP() bool {
-	return c.Arm&FPHP != 0
-}
-
-// Advanced SIMD half-precision floating point
-func (c CPUInfo) ArmASIMDHP() bool {
-	return c.Arm&ASIMDHP != 0
-}
-
-// Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
-func (c CPUInfo) ArmASIMDRDM() bool {
-	return c.Arm&ASIMDRDM != 0
-}
-
-// Javascript-style double->int convert (FJCVTZS)
-func (c CPUInfo) ArmJSCVT() bool {
-	return c.Arm&JSCVT != 0
-}
-
-// Floatin point complex number addition and multiplication
-func (c CPUInfo) ArmFCMA() bool {
-	return c.Arm&FCMA != 0
-}
-
-// Weaker release consistency (LDAPR, etc)
-func (c CPUInfo) ArmLRCPC() bool {
-	return c.Arm&LRCPC != 0
-}
-
-// Data cache clean to Point of Persistence (DC CVAP)
-func (c CPUInfo) ArmDCPOP() bool {
-	return c.Arm&DCPOP != 0
-}
-
-// SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
-func (c CPUInfo) ArmSHA3() bool {
-	return c.Arm&SHA3 != 0
-}
-
-// SM3 instructions
-func (c CPUInfo) ArmSM3() bool {
-	return c.Arm&SM3 != 0
-}
-
-// SM4 instructions
-func (c CPUInfo) ArmSM4() bool {
-	return c.Arm&SM4 != 0
-}
-
-// SIMD Dot Product
-func (c CPUInfo) ArmASIMDDP() bool {
-	return c.Arm&ASIMDDP != 0
-}
-
-// SHA512 instructions
-func (c CPUInfo) ArmSHA512() bool {
-	return c.Arm&SHA512 != 0
-}
-
-// Scalable Vector Extension
-func (c CPUInfo) ArmSVE() bool {
-	return c.Arm&SVE != 0
-}
-
-// Generic Pointer Authentication
-func (c CPUInfo) ArmGPA() bool {
-	return c.Arm&GPA != 0
-}
diff --git a/vendor/github.com/klauspost/cpuid/.gitignore b/vendor/github.com/klauspost/cpuid/v2/.gitignore
index daf913b1b..daf913b1b 100644
--- a/vendor/github.com/klauspost/cpuid/.gitignore
+++ b/vendor/github.com/klauspost/cpuid/v2/.gitignore
diff --git a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
new file mode 100644
index 000000000..944cc0007
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
@@ -0,0 +1,74 @@
+# This is an example goreleaser.yaml file with some sane defaults.
+# Make sure to check the documentation at http://goreleaser.com
+
+builds:
+  -
+    id: "cpuid"
+    binary: cpuid
+    main: ./cmd/cpuid/main.go
+    env:
+      - CGO_ENABLED=0
+    flags:
+      - -ldflags=-s -w
+    goos:
+      - aix
+      - linux
+      - freebsd
+      - netbsd
+      - windows
+      - darwin
+    goarch:
+      - 386
+      - amd64
+      - arm64
+    goarm:
+      - 7
+
+archives:
+  -
+    id: cpuid
+    name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
+    replacements:
+      aix: AIX
+      darwin: OSX
+      linux: Linux
+      windows: Windows
+      386: i386
+      amd64: x86_64
+      freebsd: FreeBSD
+      netbsd: NetBSD
+    format_overrides:
+      - goos: windows
+        format: zip
+    files:
+      - LICENSE
+checksum:
+  name_template: 'checksums.txt'
+snapshot:
+  name_template: "{{ .Tag }}-next"
+changelog:
+  sort: asc
+  filters:
+    exclude:
+    - '^doc:'
+    - '^docs:'
+    - '^test:'
+    - '^tests:'
+    - '^Update\sREADME.md'
+
+nfpms:
+  -
+    file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    vendor: Klaus Post
+    homepage: https://github.com/klauspost/cpuid
+    maintainer: Klaus Post <klauspost@gmail.com>
+    description: CPUID Tool
+    license: BSD 3-Clause
+    formats:
+      - deb
+      - rpm
+    replacements:
+      darwin: Darwin
+      linux: Linux
+      freebsd: FreeBSD
+      amd64: x86_64
diff --git a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
index 2ef4714f7..2ef4714f7 100644
--- a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
+++ b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/v2/LICENSE
index 5cec7ee94..5cec7ee94 100644
--- a/vendor/github.com/klauspost/cpuid/LICENSE
+++ b/vendor/github.com/klauspost/cpuid/v2/LICENSE
diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md
new file mode 100644
index 000000000..ea7df3dd8
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@@ -0,0 +1,258 @@
+# cpuid
+Package cpuid provides information about the CPU running the current program.
+
+CPU features are detected on startup, and kept for fast access through the life of the application.
+Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
+
+You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+
+Package home: https://github.com/klauspost/cpuid
+
+[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
+[![Build Status][3]][4]
+
+[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
+[4]: https://travis-ci.org/klauspost/cpuid
+
+## installing
+
+`go get -u github.com/klauspost/cpuid/v2` using modules. 
+
+Drop `v2` for others.
+
+## example
+
+```Go
+package main
+
+import (
+	"fmt"
+	"strings"
+
+	. "github.com/klauspost/cpuid/v2"
+)
+
+func main() {
+	// Print basic CPU information:
+	fmt.Println("Name:", CPU.BrandName)
+	fmt.Println("PhysicalCores:", CPU.PhysicalCores)
+	fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
+	fmt.Println("LogicalCores:", CPU.LogicalCores)
+	fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
+	fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
+	fmt.Println("Cacheline bytes:", CPU.CacheLine)
+	fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
+	fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
+	fmt.Println("Frequency", CPU.Hz, "hz")
+
+	// Test if we have these specific features:
+	if CPU.Supports(SSE, SSE2) {
+		fmt.Println("We have Streaming SIMD 2 Extensions")
+	}
+}
+```
+
+Sample output:
+```
+>go run main.go
+Name: AMD Ryzen 9 3950X 16-Core Processor
+PhysicalCores: 16
+ThreadsPerCore: 2
+LogicalCores: 32
+Family 23 Model: 113 Vendor ID: AMD
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
+Cacheline bytes: 64
+L1 Data Cache: 32768 bytes
+L1 Instruction Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+Frequency 0 hz
+We have Streaming SIMD 2 Extensions
+```
+
+# usage
+
+The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
+A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.  
+
+Note that for some cpu/os combinations some features will not be detected.
+`amd64` has rather good support and should work reliably on all platforms.
+
+Note that hypervisors may not pass through all CPU features.
+
+## arm64 feature detection
+
+Not all operating systems provide ARM features directly 
+and there is no safe way to do so for the rest.
+
+Currently `arm64/linux` and `arm64/freebsd` should be quite reliable. 
+`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
+
+A `DetectARM()` can be used if you are able to control your deployment,
+it will detect CPU features, but may crash if the OS doesn't intercept the calls.
+A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
+ 
+Note that currently only features are detected on ARM, 
+no additional information is currently available. 
+
+## flags
+
+It is possible to add flags that affects cpu detection.
+
+For this the `Flags()` command is provided.
+
+This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
+
+This means that any detection used in `init()` functions will not contain these flags.
+
+Example:
+
+```Go
+package main
+
+import (
+	"flag"
+	"fmt"
+	"strings"
+
+	"github.com/klauspost/cpuid/v2"
+)
+
+func main() {
+	cpuid.Flags()
+	flag.Parse()
+	cpuid.Detect()
+
+	// Test if we have these specific features:
+	if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
+		fmt.Println("We have Streaming SIMD 2 Extensions")
+	}
+}
+```
+
+## commandline
+
+Download as binary from: https://github.com/klauspost/cpuid/releases
+
+Install from source:
+
+`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
+
+### Example
+
+```
+λ cpuid
+Name: AMD Ryzen 9 3950X 16-Core Processor
+Vendor String: AuthenticAMD
+Vendor ID: AMD
+PhysicalCores: 16
+Threads Per Core: 2
+Logical Cores: 32
+CPU Family 23 Model: 113
+Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
+Microarchitecture level: 3
+Cacheline bytes: 64
+L1 Instruction Cache: 32768 bytes
+L1 Data Cache: 32768 bytes
+L2 Cache: 524288 bytes
+L3 Cache: 16777216 bytes
+
+```
+### JSON Output:
+
+```
+λ cpuid --json
+{
+  "BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
+  "VendorID": 2,
+  "VendorString": "AuthenticAMD",
+  "PhysicalCores": 16,
+  "ThreadsPerCore": 2,
+  "LogicalCores": 32,
+  "Family": 23,
+  "Model": 113,
+  "CacheLine": 64,
+  "Hz": 0,
+  "BoostFreq": 0,
+  "Cache": {
+    "L1I": 32768,
+    "L1D": 32768,
+    "L2": 524288,
+    "L3": 16777216
+  },
+  "SGX": {
+    "Available": false,
+    "LaunchControl": false,
+    "SGX1Supported": false,
+    "SGX2Supported": false,
+    "MaxEnclaveSizeNot64": 0,
+    "MaxEnclaveSize64": 0,
+    "EPCSections": null
+  },
+  "Features": [
+    "ADX",
+    "AESNI",
+    "AVX",
+    "AVX2",
+    "BMI1",
+    "BMI2",
+    "CLMUL",
+    "CLZERO",
+    "CMOV",
+    "CMPXCHG8",
+    "CPBOOST",
+    "CX16",
+    "F16C",
+    "FMA3",
+    "FXSR",
+    "FXSROPT",
+    "HTT",
+    "HYPERVISOR",
+    "LAHF",
+    "LZCNT",
+    "MCAOVERFLOW",
+    "MMX",
+    "MMXEXT",
+    "MOVBE",
+    "NX",
+    "OSXSAVE",
+    "POPCNT",
+    "RDRAND",
+    "RDSEED",
+    "RDTSCP",
+    "SCE",
+    "SHA",
+    "SSE",
+    "SSE2",
+    "SSE3",
+    "SSE4",
+    "SSE42",
+    "SSE4A",
+    "SSSE3",
+    "SUCCOR",
+    "X87",
+    "XSAVE"
+  ],
+  "X64Level": 3
+}
+```
+
+### Check CPU microarch level
+
+```
+λ cpuid --check-level=3
+2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
+Exit Code 0
+
+λ cpuid --check-level=4
+2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
+2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
+Exit Code 1
+```
+
+# license
+
+This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
new file mode 100644
index 000000000..27f33250e
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -0,0 +1,1291 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) as well as arm64 is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package cpuid
+
+import (
+	"flag"
+	"fmt"
+	"math"
+	"math/bits"
+	"os"
+	"runtime"
+	"strings"
+)
+
+// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
+// and Processor Programming Reference (PPR)
+
+// Vendor is a representation of a CPU vendor.
+type Vendor int
+
+const (
+	VendorUnknown Vendor = iota
+	Intel
+	AMD
+	VIA
+	Transmeta
+	NSC
+	KVM  // Kernel-based Virtual Machine
+	MSVM // Microsoft Hyper-V or Windows Virtual PC
+	VMware
+	XenHVM
+	Bhyve
+	Hygon
+	SiS
+	RDC
+
+	Ampere
+	ARM
+	Broadcom
+	Cavium
+	DEC
+	Fujitsu
+	Infineon
+	Motorola
+	NVIDIA
+	AMCC
+	Qualcomm
+	Marvell
+
+	lastVendor
+)
+
+//go:generate stringer -type=FeatureID,Vendor
+
+// FeatureID is the ID of a specific cpu feature.
+type FeatureID int
+
+const (
+	// Keep index -1 as unknown
+	UNKNOWN = -1
+
+	// Add features
+	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	AESNI                               // Advanced Encryption Standard New Instructions
+	AMD3DNOW                            // AMD 3DNOW
+	AMD3DNOWEXT                         // AMD 3DNowExt
+	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
+	AMXINT8                             // Tile computational operations on 8-bit integers
+	AMXTILE                             // Tile architecture
+	AVX                                 // AVX functions
+	AVX2                                // AVX2 functions
+	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
+	AVX512BITALG                        // AVX-512 Bit Algorithms
+	AVX512BW                            // AVX-512 Byte and Word Instructions
+	AVX512CD                            // AVX-512 Conflict Detection Instructions
+	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
+	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
+	AVX512F                             // AVX-512 Foundation
+	AVX512FP16                          // AVX-512 FP16 Instructions
+	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                            // AVX-512 Prefetch Instructions
+	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
+	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
+	AVX512VL                            // AVX-512 Vector Length Extensions
+	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
+	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
+	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
+	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
+	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
+	BMI1                                // Bit Manipulation Instruction Set 1
+	BMI2                                // Bit Manipulation Instruction Set 2
+	CETIBT                              // Intel CET Indirect Branch Tracking
+	CETSS                               // Intel CET Shadow Stack
+	CLDEMOTE                            // Cache Line Demote
+	CLMUL                               // Carry-less Multiplication
+	CLZERO                              // CLZERO instruction supported
+	CMOV                                // i686 CMOV
+	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
+	CMPXCHG8                            // CMPXCHG8 instruction
+	CPBOOST                             // Core Performance Boost
+	CX16                                // CMPXCHG16B Instruction
+	ENQCMD                              // Enqueue Command
+	ERMS                                // Enhanced REP MOVSB/STOSB
+	F16C                                // Half-precision floating-point conversion
+	FMA3                                // Intel FMA 3. Does not imply AVX.
+	FMA4                                // Bulldozer FMA4 functions
+	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FXSROPT                             // FXSAVE/FXRSTOR optimizations
+	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
+	HLE                                 // Hardware Lock Elision
+	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
+	HTT                                 // Hyperthreading (enabled)
+	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
+	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
+	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	IBS                                 // Instruction Based Sampling (AMD)
+	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
+	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
+	IBSFFV                              // Instruction Based Sampling Feature (AMD)
+	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
+	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
+	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
+	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
+	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
+	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
+	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
+	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
+	LAHF                                // LAHF/SAHF in long mode
+	LAM                                 // If set, CPU supports Linear Address Masking
+	LBRVIRT                             // LBR virtualization
+	LZCNT                               // LZCNT instruction
+	MCAOVERFLOW                         // MCA overflow recovery support.
+	MCOMMIT                             // MCOMMIT instruction supported
+	MMX                                 // standard MMX
+	MMXEXT                              // SSE integer functions or AMD MMX ext
+	MOVBE                               // MOVBE instruction (big-endian)
+	MOVDIR64B                           // Move 64 Bytes as Direct Store
+	MOVDIRI                             // Move Doubleword as Direct Store
+	MOVSB_ZL                            // Fast Zero-Length MOVSB
+	MPX                                 // Intel MPX (Memory Protection Extensions)
+	MSRIRC                              // Instruction Retired Counter MSR available
+	MSR_PAGEFLUSH                       // Page Flush MSR available
+	NRIPS                               // Indicates support for NRIP save on VMEXIT
+	NX                                  // NX (No-Execute) bit
+	OSXSAVE                             // XSAVE enabled by OS
+	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
+	POPCNT                              // POPCNT instruction
+	RDPRU                               // RDPRU instruction supported
+	RDRAND                              // RDRAND instruction is available
+	RDSEED                              // RDSEED instruction is available
+	RDTSCP                              // RDTSCP Instruction
+	RTM                                 // Restricted Transactional Memory
+	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
+	SERIALIZE                           // Serialize Instruction Execution
+	SEV                                 // AMD Secure Encrypted Virtualization supported
+	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
+	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
+	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
+	SEV_ES                              // AMD SEV Encrypted State supported
+	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
+	SEV_SNP                             // AMD SEV Secure Nested Paging supported
+	SGX                                 // Software Guard Extensions
+	SGXLC                               // Software Guard Extensions Launch Control
+	SHA                                 // Intel SHA Extensions
+	SME                                 // AMD Secure Memory Encryption supported
+	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
+	SSE                                 // SSE functions
+	SSE2                                // P4 SSE functions
+	SSE3                                // Prescott SSE3 functions
+	SSE4                                // Penryn SSE4.1 functions
+	SSE42                               // Nehalem SSE4.2 functions
+	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
+	SSSE3                               // Conroe SSSE3 functions
+	STIBP                               // Single Thread Indirect Branch Predictors
+	STOSB_SHORT                         // Fast short STOSB
+	SUCCOR                              // Software uncorrectable error containment and recovery capability.
+	SVM                                 // AMD Secure Virtual Machine
+	SVMDA                               // Indicates support for the SVM decode assists.
+	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
+	SVMNP                               // AMD SVM nested paging
+	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
+	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+	SYSEE                               // SYSENTER and SYSEXIT instructions
+	TBM                                 // AMD Trailing Bit Manipulation
+	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
+	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
+	VAES                                // Vector AES. AVX(512) versions requires additional checks.
+	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
+	VMPL                                // AMD VM Permission Levels supported
+	VMSA_REGPROT                        // AMD VMSA Register Protection supported
+	VMX                                 // Virtual Machine Extensions
+	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+	VTE                                 // AMD Virtual Transparent Encryption supported
+	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
+	WBNOINVD                            // Write Back and Do Not Invalidate Cache
+	X87                                 // FPU
+	XGETBV1                             // Supports XGETBV with ECX = 1
+	XOP                                 // Bulldozer XOP functions
+	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
+	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
+	XSAVEOPT                            // XSAVEOPT available
+	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
+
+	// ARM features:
+	AESARM   // AES instructions
+	ARMCPUID // Some CPU ID registers readable at user-level
+	ASIMD    // Advanced SIMD
+	ASIMDDP  // SIMD Dot Product
+	ASIMDHP  // Advanced SIMD half-precision floating point
+	ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+	ATOMICS  // Large System Extensions (LSE)
+	CRC32    // CRC32/CRC32C instructions
+	DCPOP    // Data cache clean to Point of Persistence (DC CVAP)
+	EVTSTRM  // Generic timer
+	FCMA     // Floatin point complex number addition and multiplication
+	FP       // Single-precision and double-precision floating point
+	FPHP     // Half-precision floating point
+	GPA      // Generic Pointer Authentication
+	JSCVT    // Javascript-style double->int convert (FJCVTZS)
+	LRCPC    // Weaker release consistency (LDAPR, etc)
+	PMULL    // Polynomial Multiply instructions (PMULL/PMULL2)
+	SHA1     // SHA-1 instructions (SHA1C, etc)
+	SHA2     // SHA-2 instructions (SHA256H, etc)
+	SHA3     // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+	SHA512   // SHA512 instructions
+	SM3      // SM3 instructions
+	SM4      // SM4 instructions
+	SVE      // Scalable Vector Extension
+	// Keep it last. It automatically defines the size of []flagSet
+	lastID
+
+	firstID FeatureID = UNKNOWN + 1
+)
+
+// CPUInfo contains information about the detected system CPU.
+type CPUInfo struct {
+	BrandName      string  // Brand name reported by the CPU
+	VendorID       Vendor  // Comparable CPU vendor ID
+	VendorString   string  // Raw vendor string.
+	featureSet     flagSet // Features of the CPU
+	PhysicalCores  int     // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+	ThreadsPerCore int     // Number of threads per physical core. Will be 1 if undetectable.
+	LogicalCores   int     // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+	Family         int     // CPU family number
+	Model          int     // CPU model number
+	Stepping       int     // CPU stepping info
+	CacheLine      int     // Cache line size in bytes. Will be 0 if undetectable.
+	Hz             int64   // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
+	BoostFreq      int64   // Max clock speed, if known, 0 otherwise
+	Cache          struct {
+		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
+		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
+		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
+		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
+	}
+	SGX       SGXSupport
+	maxFunc   uint32
+	maxExFunc uint32
+}
+
+var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
+var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+var xgetbv func(index uint32) (eax, edx uint32)
+var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+var darwinHasAVX512 = func() bool { return false }
+
+// CPU contains information about the CPU as detected on startup,
+// or when Detect last was called.
+//
+// Use this as the primary entry point to you data.
+var CPU CPUInfo
+
+func init() {
+	initCPU()
+	Detect()
+}
+
+// Detect will re-detect current CPU info.
+// This will replace the content of the exported CPU variable.
+//
+// Unless you expect the CPU to change while you are running your program
+// you should not need to call this function.
+// If you call this, you must ensure that no other goroutine is accessing the
+// exported CPU variable.
+func Detect() {
+	// Set defaults
+	CPU.ThreadsPerCore = 1
+	CPU.Cache.L1I = -1
+	CPU.Cache.L1D = -1
+	CPU.Cache.L2 = -1
+	CPU.Cache.L3 = -1
+	safe := true
+	if detectArmFlag != nil {
+		safe = !*detectArmFlag
+	}
+	addInfo(&CPU, safe)
+	if displayFeats != nil && *displayFeats {
+		fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
+		// Exit with non-zero so tests will print value.
+		os.Exit(1)
+	}
+	if disableFlag != nil {
+		s := strings.Split(*disableFlag, ",")
+		for _, feat := range s {
+			feat := ParseFeature(strings.TrimSpace(feat))
+			if feat != UNKNOWN {
+				CPU.featureSet.unset(feat)
+			}
+		}
+	}
+}
+
+// DetectARM will detect ARM64 features.
+// This is NOT done automatically since it can potentially crash
+// if the OS does not handle the command.
+// If in the future this can be done safely this function may not
+// do anything.
+func DetectARM() {
+	addInfo(&CPU, false)
+}
+
+var detectArmFlag *bool
+var displayFeats *bool
+var disableFlag *string
+
+// Flags will enable flags.
+// This must be called *before* flag.Parse AND
+// Detect must be called after the flags have been parsed.
+// Note that this means that any detection used in init() functions
+// will not contain these flags.
+func Flags() {
+	disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
+	displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
+	detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
+}
+
+// Supports returns whether the CPU supports all of the requested features.
+func (c CPUInfo) Supports(ids ...FeatureID) bool {
+	for _, id := range ids {
+		if !c.featureSet.inSet(id) {
+			return false
+		}
+	}
+	return true
+}
+
+// Has allows for checking a single feature.
+// Should be inlined by the compiler.
+func (c CPUInfo) Has(id FeatureID) bool {
+	return c.featureSet.inSet(id)
+}
+
+// AnyOf returns whether the CPU supports one or more of the requested features.
+func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
+	for _, id := range ids {
+		if c.featureSet.inSet(id) {
+			return true
+		}
+	}
+	return false
+}
+
+// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)
+var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+
+// X64Level returns the microarchitecture level detected on the CPU.
+// If features are lacking or non x64 mode, 0 is returned.
+// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
+func (c CPUInfo) X64Level() int {
+	if c.featureSet.hasSet(level4Features) {
+		return 4
+	}
+	if c.featureSet.hasSet(level3Features) {
+		return 3
+	}
+	if c.featureSet.hasSet(level2Features) {
+		return 2
+	}
+	if c.featureSet.hasSet(level1Features) {
+		return 1
+	}
+	return 0
+}
+
+// Disable will disable one or several features.
+func (c *CPUInfo) Disable(ids ...FeatureID) bool {
+	for _, id := range ids {
+		c.featureSet.unset(id)
+	}
+	return true
+}
+
+// Enable will disable one or several features even if they were undetected.
+// This is of course not recommended for obvious reasons.
+func (c *CPUInfo) Enable(ids ...FeatureID) bool {
+	for _, id := range ids {
+		c.featureSet.set(id)
+	}
+	return true
+}
+
+// IsVendor returns true if vendor is recognized as Intel
+func (c CPUInfo) IsVendor(v Vendor) bool {
+	return c.VendorID == v
+}
+
+// FeatureSet returns all available features as strings.
+func (c CPUInfo) FeatureSet() []string {
+	s := make([]string, 0, c.featureSet.nEnabled())
+	s = append(s, c.featureSet.Strings()...)
+	return s
+}
+
+// RTCounter returns the 64-bit time-stamp counter
+// Uses the RDTSCP instruction. The value 0 is returned
+// if the CPU does not support the instruction.
+func (c CPUInfo) RTCounter() uint64 {
+	if !c.Supports(RDTSCP) {
+		return 0
+	}
+	a, _, _, d := rdtscpAsm()
+	return uint64(a) | (uint64(d) << 32)
+}
+
+// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
+// This variable is OS dependent, but on Linux contains information
+// about the current cpu/core the code is running on.
+// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
+func (c CPUInfo) Ia32TscAux() uint32 {
+	if !c.Supports(RDTSCP) {
+		return 0
+	}
+	_, _, ecx, _ := rdtscpAsm()
+	return ecx
+}
+
+// LogicalCPU will return the Logical CPU the code is currently executing on.
+// This is likely to change when the OS re-schedules the running thread
+// to another CPU.
+// If the current core cannot be detected, -1 will be returned.
+func (c CPUInfo) LogicalCPU() int {
+	if c.maxFunc < 1 {
+		return -1
+	}
+	_, ebx, _, _ := cpuid(1)
+	return int(ebx >> 24)
+}
+
+// frequencies tries to compute the clock speed of the CPU. If leaf 15 is
+// supported, use it, otherwise parse the brand string. Yes, really.
+func (c *CPUInfo) frequencies() {
+	c.Hz, c.BoostFreq = 0, 0
+	mfi := maxFunctionID()
+	if mfi >= 0x15 {
+		eax, ebx, ecx, _ := cpuid(0x15)
+		if eax != 0 && ebx != 0 && ecx != 0 {
+			c.Hz = (int64(ecx) * int64(ebx)) / int64(eax)
+		}
+	}
+	if mfi >= 0x16 {
+		a, b, _, _ := cpuid(0x16)
+		// Base...
+		if a&0xffff > 0 {
+			c.Hz = int64(a&0xffff) * 1_000_000
+		}
+		// Boost...
+		if b&0xffff > 0 {
+			c.BoostFreq = int64(b&0xffff) * 1_000_000
+		}
+	}
+	if c.Hz > 0 {
+		return
+	}
+
+	// computeHz determines the official rated speed of a CPU from its brand
+	// string. This insanity is *actually the official documented way to do
+	// this according to Intel*, prior to leaf 0x15 existing. The official
+	// documentation only shows this working for exactly `x.xx` or `xxxx`
+	// cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
+	// sizes.
+	model := c.BrandName
+	hz := strings.LastIndex(model, "Hz")
+	if hz < 3 {
+		return
+	}
+	var multiplier int64
+	switch model[hz-1] {
+	case 'M':
+		multiplier = 1000 * 1000
+	case 'G':
+		multiplier = 1000 * 1000 * 1000
+	case 'T':
+		multiplier = 1000 * 1000 * 1000 * 1000
+	}
+	if multiplier == 0 {
+		return
+	}
+	freq := int64(0)
+	divisor := int64(0)
+	decimalShift := int64(1)
+	var i int
+	for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
+		if model[i] >= '0' && model[i] <= '9' {
+			freq += int64(model[i]-'0') * decimalShift
+			decimalShift *= 10
+		} else if model[i] == '.' {
+			if divisor != 0 {
+				return
+			}
+			divisor = decimalShift
+		} else {
+			return
+		}
+	}
+	// we didn't find a space
+	if i < 0 {
+		return
+	}
+	if divisor != 0 {
+		c.Hz = (freq * multiplier) / divisor
+		return
+	}
+	c.Hz = freq * multiplier
+}
+
+// VM Will return true if the cpu id indicates we are in
+// a virtual machine.
+func (c CPUInfo) VM() bool {
+	return CPU.featureSet.inSet(HYPERVISOR)
+}
+
+// flags contains detected cpu features and characteristics
+type flags uint64
+
+// log2(bits_in_uint64)
+const flagBitsLog2 = 6
+const flagBits = 1 << flagBitsLog2
+const flagMask = flagBits - 1
+
+// flagSet contains detected cpu features and characteristics in an array of flags
+type flagSet [(lastID + flagMask) / flagBits]flags
+
+func (s flagSet) inSet(feat FeatureID) bool {
+	return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
+}
+
+func (s *flagSet) set(feat FeatureID) {
+	s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
+}
+
+// setIf will set a feature if boolean is true.
+func (s *flagSet) setIf(cond bool, features ...FeatureID) {
+	if cond {
+		for _, offset := range features {
+			s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
+		}
+	}
+}
+
+func (s *flagSet) unset(offset FeatureID) {
+	bit := flags(1 << (offset & flagMask))
+	s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
+}
+
+// or with another flagset.
+func (s *flagSet) or(other flagSet) {
+	for i, v := range other[:] {
+		s[i] |= v
+	}
+}
+
+// hasSet returns whether all features are present.
+func (s flagSet) hasSet(other flagSet) bool {
+	for i, v := range other[:] {
+		if s[i]&v != v {
+			return false
+		}
+	}
+	return true
+}
+
+// nEnabled will return the number of enabled flags.
+func (s flagSet) nEnabled() (n int) {
+	for _, v := range s[:] {
+		n += bits.OnesCount64(uint64(v))
+	}
+	return n
+}
+
+func flagSetWith(feat ...FeatureID) flagSet {
+	var res flagSet
+	for _, f := range feat {
+		res.set(f)
+	}
+	return res
+}
+
+// ParseFeature will parse the string and return the ID of the matching feature.
+// Will return UNKNOWN if not found.
+func ParseFeature(s string) FeatureID {
+	s = strings.ToUpper(s)
+	for i := firstID; i < lastID; i++ {
+		if i.String() == s {
+			return i
+		}
+	}
+	return UNKNOWN
+}
+
+// Strings returns an array of the detected features for FlagsSet.
+func (s flagSet) Strings() []string {
+	if len(s) == 0 {
+		return []string{""}
+	}
+	r := make([]string, 0)
+	for i := firstID; i < lastID; i++ {
+		if s.inSet(i) {
+			r = append(r, i.String())
+		}
+	}
+	return r
+}
+
+func maxExtendedFunction() uint32 {
+	eax, _, _, _ := cpuid(0x80000000)
+	return eax
+}
+
+func maxFunctionID() uint32 {
+	a, _, _, _ := cpuid(0)
+	return a
+}
+
+func brandName() string {
+	if maxExtendedFunction() >= 0x80000004 {
+		v := make([]uint32, 0, 48)
+		for i := uint32(0); i < 3; i++ {
+			a, b, c, d := cpuid(0x80000002 + i)
+			v = append(v, a, b, c, d)
+		}
+		return strings.Trim(string(valAsString(v...)), " ")
+	}
+	return "unknown"
+}
+
+func threadsPerCore() int {
+	mfi := maxFunctionID()
+	vend, _ := vendorID()
+
+	if mfi < 0x4 || (vend != Intel && vend != AMD) {
+		return 1
+	}
+
+	if mfi < 0xb {
+		if vend != Intel {
+			return 1
+		}
+		_, b, _, d := cpuid(1)
+		if (d & (1 << 28)) != 0 {
+			// v will contain logical core count
+			v := (b >> 16) & 255
+			if v > 1 {
+				a4, _, _, _ := cpuid(4)
+				// physical cores
+				v2 := (a4 >> 26) + 1
+				if v2 > 0 {
+					return int(v) / int(v2)
+				}
+			}
+		}
+		return 1
+	}
+	_, b, _, _ := cpuidex(0xb, 0)
+	if b&0xffff == 0 {
+		if vend == AMD {
+			// Workaround for AMD returning 0, assume 2 if >= Zen 2
+			// It will be more correct than not.
+			fam, _, _ := familyModel()
+			_, _, _, d := cpuid(1)
+			if (d&(1<<28)) != 0 && fam >= 23 {
+				return 2
+			}
+		}
+		return 1
+	}
+	return int(b & 0xffff)
+}
+
+func logicalCores() int {
+	mfi := maxFunctionID()
+	v, _ := vendorID()
+	switch v {
+	case Intel:
+		// Use this on old Intel processors
+		if mfi < 0xb {
+			if mfi < 1 {
+				return 0
+			}
+			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
+			// that can be assigned to logical processors in a physical package.
+			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
+			_, ebx, _, _ := cpuid(1)
+			logical := (ebx >> 16) & 0xff
+			return int(logical)
+		}
+		_, b, _, _ := cpuidex(0xb, 1)
+		return int(b & 0xffff)
+	case AMD, Hygon:
+		_, b, _, _ := cpuid(1)
+		return int((b >> 16) & 0xff)
+	default:
+		return 0
+	}
+}
+
+func familyModel() (family, model, stepping int) {
+	if maxFunctionID() < 0x1 {
+		return 0, 0, 0
+	}
+	eax, _, _, _ := cpuid(1)
+	// If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
+	family = int((eax >> 8) & 0xf)
+	extFam := family == 0x6 // Intel is 0x6, needs extended model.
+	if family == 0xf {
+		// Add ExtFamily
+		family += int((eax >> 20) & 0xff)
+		extFam = true
+	}
+	// If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
+	model = int((eax >> 4) & 0xf)
+	if extFam {
+		// Add ExtModel
+		model += int((eax >> 12) & 0xf0)
+	}
+	stepping = int(eax & 0xf)
+	return family, model, stepping
+}
+
+func physicalCores() int {
+	v, _ := vendorID()
+	switch v {
+	case Intel:
+		return logicalCores() / threadsPerCore()
+	case AMD, Hygon:
+		lc := logicalCores()
+		tpc := threadsPerCore()
+		if lc > 0 && tpc > 0 {
+			return lc / tpc
+		}
+
+		// The following is inaccurate on AMD EPYC 7742 64-Core Processor
+		if maxExtendedFunction() >= 0x80000008 {
+			_, _, c, _ := cpuid(0x80000008)
+			if c&0xff > 0 {
+				return int(c&0xff) + 1
+			}
+		}
+	}
+	return 0
+}
+
+// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
+var vendorMapping = map[string]Vendor{
+	"AMDisbetter!": AMD,
+	"AuthenticAMD": AMD,
+	"CentaurHauls": VIA,
+	"GenuineIntel": Intel,
+	"TransmetaCPU": Transmeta,
+	"GenuineTMx86": Transmeta,
+	"Geode by NSC": NSC,
+	"VIA VIA VIA ": VIA,
+	"KVMKVMKVMKVM": KVM,
+	"Microsoft Hv": MSVM,
+	"VMwareVMware": VMware,
+	"XenVMMXenVMM": XenHVM,
+	"bhyve bhyve ": Bhyve,
+	"HygonGenuine": Hygon,
+	"Vortex86 SoC": SiS,
+	"SiS SiS SiS ": SiS,
+	"RiseRiseRise": SiS,
+	"Genuine  RDC": RDC,
+}
+
+func vendorID() (Vendor, string) {
+	_, b, c, d := cpuid(0)
+	v := string(valAsString(b, d, c))
+	vend, ok := vendorMapping[v]
+	if !ok {
+		return VendorUnknown, v
+	}
+	return vend, v
+}
+
+func cacheLine() int {
+	if maxFunctionID() < 0x1 {
+		return 0
+	}
+
+	_, ebx, _, _ := cpuid(1)
+	cache := (ebx & 0xff00) >> 5 // cflush size
+	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
+		_, _, ecx, _ := cpuid(0x80000006)
+		cache = ecx & 0xff // cacheline size
+	}
+	// TODO: Read from Cache and TLB Information
+	return int(cache)
+}
+
+func (c *CPUInfo) cacheSize() {
+	c.Cache.L1D = -1
+	c.Cache.L1I = -1
+	c.Cache.L2 = -1
+	c.Cache.L3 = -1
+	vendor, _ := vendorID()
+	switch vendor {
+	case Intel:
+		if maxFunctionID() < 4 {
+			return
+		}
+		c.Cache.L1I, c.Cache.L1D, c.Cache.L2, c.Cache.L3 = 0, 0, 0, 0
+		for i := uint32(0); ; i++ {
+			eax, ebx, ecx, _ := cpuidex(4, i)
+			cacheType := eax & 15
+			if cacheType == 0 {
+				break
+			}
+			cacheLevel := (eax >> 5) & 7
+			coherency := int(ebx&0xfff) + 1
+			partitions := int((ebx>>12)&0x3ff) + 1
+			associativity := int((ebx>>22)&0x3ff) + 1
+			sets := int(ecx) + 1
+			size := associativity * partitions * coherency * sets
+			switch cacheLevel {
+			case 1:
+				if cacheType == 1 {
+					// 1 = Data Cache
+					c.Cache.L1D = size
+				} else if cacheType == 2 {
+					// 2 = Instruction Cache
+					c.Cache.L1I = size
+				} else {
+					if c.Cache.L1D < 0 {
+						c.Cache.L1I = size
+					}
+					if c.Cache.L1I < 0 {
+						c.Cache.L1I = size
+					}
+				}
+			case 2:
+				c.Cache.L2 = size
+			case 3:
+				c.Cache.L3 = size
+			}
+		}
+	case AMD, Hygon:
+		// Untested.
+		if maxExtendedFunction() < 0x80000005 {
+			return
+		}
+		_, _, ecx, edx := cpuid(0x80000005)
+		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
+		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
+
+		if maxExtendedFunction() < 0x80000006 {
+			return
+		}
+		_, _, ecx, _ = cpuid(0x80000006)
+		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
+
+		// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
+		if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
+			return
+		}
+
+		// Xen Hypervisor is buggy and returns the same entry no matter ECX value.
+		// Hack: When we encounter the same entry 100 times we break.
+		nSame := 0
+		var last uint32
+		for i := uint32(0); i < math.MaxUint32; i++ {
+			eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
+
+			level := (eax >> 5) & 7
+			cacheNumSets := ecx + 1
+			cacheLineSize := 1 + (ebx & 2047)
+			cachePhysPartitions := 1 + ((ebx >> 12) & 511)
+			cacheNumWays := 1 + ((ebx >> 22) & 511)
+
+			typ := eax & 15
+			size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
+			if typ == 0 {
+				return
+			}
+
+			// Check for the same value repeated.
+			comb := eax ^ ebx ^ ecx
+			if comb == last {
+				nSame++
+				if nSame == 100 {
+					return
+				}
+			}
+			last = comb
+
+			switch level {
+			case 1:
+				switch typ {
+				case 1:
+					// Data cache
+					c.Cache.L1D = size
+				case 2:
+					// Inst cache
+					c.Cache.L1I = size
+				default:
+					if c.Cache.L1D < 0 {
+						c.Cache.L1I = size
+					}
+					if c.Cache.L1I < 0 {
+						c.Cache.L1I = size
+					}
+				}
+			case 2:
+				c.Cache.L2 = size
+			case 3:
+				c.Cache.L3 = size
+			}
+		}
+	}
+}
+
+type SGXEPCSection struct {
+	BaseAddress uint64
+	EPCSize     uint64
+}
+
+type SGXSupport struct {
+	Available           bool
+	LaunchControl       bool
+	SGX1Supported       bool
+	SGX2Supported       bool
+	MaxEnclaveSizeNot64 int64
+	MaxEnclaveSize64    int64
+	EPCSections         []SGXEPCSection
+}
+
+func hasSGX(available, lc bool) (rval SGXSupport) {
+	rval.Available = available
+
+	if !available {
+		return
+	}
+
+	rval.LaunchControl = lc
+
+	a, _, _, d := cpuidex(0x12, 0)
+	rval.SGX1Supported = a&0x01 != 0
+	rval.SGX2Supported = a&0x02 != 0
+	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
+	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
+	rval.EPCSections = make([]SGXEPCSection, 0)
+
+	for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
+		eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
+		leafType := eax & 0xf
+
+		if leafType == 0 {
+			// Invalid subleaf, stop iterating
+			break
+		} else if leafType == 1 {
+			// EPC Section subleaf
+			baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
+			size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
+
+			section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
+			rval.EPCSections = append(rval.EPCSections, section)
+		}
+	}
+
+	return
+}
+
+func support() flagSet {
+	var fs flagSet
+	mfi := maxFunctionID()
+	vend, _ := vendorID()
+	if mfi < 0x1 {
+		return fs
+	}
+	family, model, _ := familyModel()
+
+	_, _, c, d := cpuid(1)
+	fs.setIf((d&(1<<0)) != 0, X87)
+	fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
+	fs.setIf((d&(1<<11)) != 0, SYSEE)
+	fs.setIf((d&(1<<15)) != 0, CMOV)
+	fs.setIf((d&(1<<23)) != 0, MMX)
+	fs.setIf((d&(1<<24)) != 0, FXSR)
+	fs.setIf((d&(1<<25)) != 0, FXSROPT)
+	fs.setIf((d&(1<<25)) != 0, SSE)
+	fs.setIf((d&(1<<26)) != 0, SSE2)
+	fs.setIf((c&1) != 0, SSE3)
+	fs.setIf((c&(1<<5)) != 0, VMX)
+	fs.setIf((c&(1<<9)) != 0, SSSE3)
+	fs.setIf((c&(1<<19)) != 0, SSE4)
+	fs.setIf((c&(1<<20)) != 0, SSE42)
+	fs.setIf((c&(1<<25)) != 0, AESNI)
+	fs.setIf((c&(1<<1)) != 0, CLMUL)
+	fs.setIf(c&(1<<22) != 0, MOVBE)
+	fs.setIf(c&(1<<23) != 0, POPCNT)
+	fs.setIf(c&(1<<30) != 0, RDRAND)
+
+	// This bit has been reserved by Intel & AMD for use by hypervisors,
+	// and indicates the presence of a hypervisor.
+	fs.setIf(c&(1<<31) != 0, HYPERVISOR)
+	fs.setIf(c&(1<<29) != 0, F16C)
+	fs.setIf(c&(1<<13) != 0, CX16)
+
+	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
+		fs.setIf(threadsPerCore() > 1, HTT)
+	}
+	if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
+		fs.setIf(threadsPerCore() > 1, HTT)
+	}
+	fs.setIf(c&1<<26 != 0, XSAVE)
+	fs.setIf(c&1<<27 != 0, OSXSAVE)
+	// Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
+	const avxCheck = 1<<26 | 1<<27 | 1<<28
+	if c&avxCheck == avxCheck {
+		// Check for OS support
+		eax, _ := xgetbv(0)
+		if (eax & 0x6) == 0x6 {
+			fs.set(AVX)
+			switch vend {
+			case Intel:
+				// Older than Haswell.
+				fs.setIf(family == 6 && model < 60, AVXSLOW)
+			case AMD:
+				// Older than Zen 2
+				fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
+			}
+		}
+	}
+	// FMA3 can be used with SSE registers, so no OS support is strictly needed.
+	// fma3 and OSXSAVE needed.
+	const fma3Check = 1<<12 | 1<<27
+	fs.setIf(c&fma3Check == fma3Check, FMA3)
+
+	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+	if mfi >= 7 {
+		_, ebx, ecx, edx := cpuidex(7, 0)
+		if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
+			fs.set(AVX2)
+		}
+		// CPUID.(EAX=7, ECX=0).EBX
+		if (ebx & 0x00000008) != 0 {
+			fs.set(BMI1)
+			fs.setIf((ebx&0x00000100) != 0, BMI2)
+		}
+		fs.setIf(ebx&(1<<2) != 0, SGX)
+		fs.setIf(ebx&(1<<4) != 0, HLE)
+		fs.setIf(ebx&(1<<9) != 0, ERMS)
+		fs.setIf(ebx&(1<<11) != 0, RTM)
+		fs.setIf(ebx&(1<<14) != 0, MPX)
+		fs.setIf(ebx&(1<<18) != 0, RDSEED)
+		fs.setIf(ebx&(1<<19) != 0, ADX)
+		fs.setIf(ebx&(1<<29) != 0, SHA)
+
+		// CPUID.(EAX=7, ECX=0).ECX
+		fs.setIf(ecx&(1<<5) != 0, WAITPKG)
+		fs.setIf(ecx&(1<<7) != 0, CETSS)
+		fs.setIf(ecx&(1<<8) != 0, GFNI)
+		fs.setIf(ecx&(1<<9) != 0, VAES)
+		fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
+		fs.setIf(ecx&(1<<13) != 0, TME)
+		fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
+		fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
+		fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
+		fs.setIf(ecx&(1<<29) != 0, ENQCMD)
+		fs.setIf(ecx&(1<<30) != 0, SGXLC)
+
+		// CPUID.(EAX=7, ECX=0).EDX
+		fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
+		fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+		fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
+		fs.setIf(edx&(1<<18) != 0, PCONFIG)
+		fs.setIf(edx&(1<<20) != 0, CETIBT)
+		fs.setIf(edx&(1<<26) != 0, IBPB)
+		fs.setIf(edx&(1<<27) != 0, STIBP)
+
+		// CPUID.(EAX=7, ECX=1)
+		eax1, _, _, _ := cpuidex(7, 1)
+		fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+		fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
+		fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
+		fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
+		fs.setIf(eax1&(1<<22) != 0, HRESET)
+		fs.setIf(eax1&(1<<26) != 0, LAM)
+
+		// Only detect AVX-512 features if XGETBV is supported
+		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+			// Check for OS support
+			eax, _ := xgetbv(0)
+
+			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+			// ZMM16-ZMM31 state are enabled by OS)
+			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
+			if runtime.GOOS == "darwin" {
+				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
+			}
+			if hasAVX512 {
+				fs.setIf(ebx&(1<<16) != 0, AVX512F)
+				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
+				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
+				fs.setIf(ebx&(1<<26) != 0, AVX512PF)
+				fs.setIf(ebx&(1<<27) != 0, AVX512ER)
+				fs.setIf(ebx&(1<<28) != 0, AVX512CD)
+				fs.setIf(ebx&(1<<30) != 0, AVX512BW)
+				fs.setIf(ebx&(1<<31) != 0, AVX512VL)
+				// ecx
+				fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
+				fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
+				fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
+				fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
+				fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
+				// edx
+				fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
+				fs.setIf(edx&(1<<22) != 0, AMXBF16)
+				fs.setIf(edx&(1<<23) != 0, AVX512FP16)
+				fs.setIf(edx&(1<<24) != 0, AMXTILE)
+				fs.setIf(edx&(1<<25) != 0, AMXINT8)
+				// eax1 = CPUID.(EAX=7, ECX=1).EAX
+				fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+			}
+		}
+	}
+	// Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
+	// EAX
+	// Bit 00: XSAVEOPT is available.
+	// Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
+	// Bit 02: Supports XGETBV with ECX = 1 if set.
+	// Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
+	// Bits 31 - 04: Reserved.
+	// EBX
+	// Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
+	// ECX
+	// Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
+	// EDX?
+	// Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
+	if mfi >= 0xd {
+		if fs.inSet(XSAVE) {
+			eax, _, _, _ := cpuidex(0xd, 1)
+			fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
+			fs.setIf(eax&(1<<1) != 0, XSAVEC)
+			fs.setIf(eax&(1<<2) != 0, XGETBV1)
+			fs.setIf(eax&(1<<3) != 0, XSAVES)
+		}
+	}
+	if maxExtendedFunction() >= 0x80000001 {
+		_, _, c, d := cpuid(0x80000001)
+		if (c & (1 << 5)) != 0 {
+			fs.set(LZCNT)
+			fs.set(POPCNT)
+		}
+		// ECX
+		fs.setIf((c&(1<<0)) != 0, LAHF)
+		fs.setIf((c&(1<<2)) != 0, SVM)
+		fs.setIf((c&(1<<6)) != 0, SSE4A)
+		fs.setIf((c&(1<<10)) != 0, IBS)
+		fs.setIf((c&(1<<22)) != 0, TOPEXT)
+
+		// EDX
+		fs.setIf(d&(1<<11) != 0, SYSCALL)
+		fs.setIf(d&(1<<20) != 0, NX)
+		fs.setIf(d&(1<<22) != 0, MMXEXT)
+		fs.setIf(d&(1<<23) != 0, MMX)
+		fs.setIf(d&(1<<24) != 0, FXSR)
+		fs.setIf(d&(1<<25) != 0, FXSROPT)
+		fs.setIf(d&(1<<27) != 0, RDTSCP)
+		fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
+		fs.setIf(d&(1<<31) != 0, AMD3DNOW)
+
+		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+		 * used unless the OS has AVX support. */
+		if fs.inSet(AVX) {
+			fs.setIf((c&(1<<11)) != 0, XOP)
+			fs.setIf((c&(1<<16)) != 0, FMA4)
+		}
+
+	}
+	if maxExtendedFunction() >= 0x80000007 {
+		_, b, _, d := cpuid(0x80000007)
+		fs.setIf((b&(1<<0)) != 0, MCAOVERFLOW)
+		fs.setIf((b&(1<<1)) != 0, SUCCOR)
+		fs.setIf((b&(1<<2)) != 0, HWA)
+		fs.setIf((d&(1<<9)) != 0, CPBOOST)
+	}
+
+	if maxExtendedFunction() >= 0x80000008 {
+		_, b, _, _ := cpuid(0x80000008)
+		fs.setIf((b&(1<<9)) != 0, WBNOINVD)
+		fs.setIf((b&(1<<8)) != 0, MCOMMIT)
+		fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+		fs.setIf((b&(1<<4)) != 0, RDPRU)
+		fs.setIf((b&(1<<3)) != 0, INVLPGB)
+		fs.setIf((b&(1<<1)) != 0, MSRIRC)
+		fs.setIf((b&(1<<0)) != 0, CLZERO)
+	}
+
+	if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
+		_, _, _, edx := cpuid(0x8000000A)
+		fs.setIf((edx>>0)&1 == 1, SVMNP)
+		fs.setIf((edx>>1)&1 == 1, LBRVIRT)
+		fs.setIf((edx>>2)&1 == 1, SVML)
+		fs.setIf((edx>>3)&1 == 1, NRIPS)
+		fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
+		fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
+		fs.setIf((edx>>6)&1 == 1, SVMFBASID)
+		fs.setIf((edx>>7)&1 == 1, SVMDA)
+		fs.setIf((edx>>10)&1 == 1, SVMPF)
+		fs.setIf((edx>>12)&1 == 1, SVMPFT)
+	}
+
+	if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
+		eax, _, _, _ := cpuid(0x8000001b)
+		fs.setIf((eax>>0)&1 == 1, IBSFFV)
+		fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
+		fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
+		fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
+		fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
+		fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
+		fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
+		fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+	}
+
+	if maxExtendedFunction() >= 0x8000001f && vend == AMD {
+		a, _, _, _ := cpuid(0x8000001f)
+		fs.setIf((a>>0)&1 == 1, SME)
+		fs.setIf((a>>1)&1 == 1, SEV)
+		fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
+		fs.setIf((a>>3)&1 == 1, SEV_ES)
+		fs.setIf((a>>4)&1 == 1, SEV_SNP)
+		fs.setIf((a>>5)&1 == 1, VMPL)
+		fs.setIf((a>>10)&1 == 1, SME_COHERENT)
+		fs.setIf((a>>11)&1 == 1, SEV_64BIT)
+		fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
+		fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
+		fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
+		fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
+		fs.setIf((a>>16)&1 == 1, VTE)
+		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
+	}
+
+	return fs
+}
+
+func valAsString(values ...uint32) []byte {
+	r := make([]byte, 4*len(values))
+	for i, v := range values {
+		dst := r[i*4:]
+		dst[0] = byte(v & 0xff)
+		dst[1] = byte((v >> 8) & 0xff)
+		dst[2] = byte((v >> 16) & 0xff)
+		dst[3] = byte((v >> 24) & 0xff)
+		switch {
+		case dst[0] == 0:
+			return r[:i*4]
+		case dst[1] == 0:
+			return r[:i*4+1]
+		case dst[2] == 0:
+			return r[:i*4+2]
+		case dst[3] == 0:
+			return r[:i*4+3]
+		}
+	}
+	return r
+}
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
index 089638f51..8587c3a1f 100644
--- a/vendor/github.com/klauspost/cpuid/cpuid_386.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
@@ -40,3 +40,8 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
 	MOVL CX, ecx+8(FP)
 	MOVL DX, edx+12(FP)
 	RET
+
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0
+	MOVL $0, eax+0(FP)
+	RET
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
index 3ba0559e9..bc11f8942 100644
--- a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
@@ -40,3 +40,33 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
 	MOVL CX, ecx+8(FP)
 	MOVL DX, edx+12(FP)
 	RET
+
+// From https://go-review.googlesource.com/c/sys/+/285572/
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
+	MOVB $0, ret+0(FP) // default to false
+
+#ifdef GOOS_darwin // return if not darwin
+#ifdef GOARCH_amd64 // return if not amd64
+// These values from:
+// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
+#define commpage64_base_address         0x00007fffffe00000
+#define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
+#define commpage64_version              (commpage64_base_address+0x01E)
+#define hasAVX512F                      0x0000004000000000
+	MOVQ $commpage64_version, BX
+	MOVW (BX), AX
+	CMPW AX, $13                            // versions < 13 do not support AVX512
+	JL   no_avx512
+	MOVQ $commpage64_cpu_capabilities64, BX
+	MOVQ (BX), AX
+	MOVQ $hasAVX512F, CX
+	ANDQ CX, AX
+	JZ   no_avx512
+	MOVB $1, ret+0(FP)
+
+no_avx512:
+#endif
+#endif
+	RET
+
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
index 8975ee8db..b31d6aec4 100644
--- a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
@@ -1,6 +1,6 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build arm64,!gccgo
+//+build arm64,!gccgo,!noasm,!appengine
 
 // See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
 
diff --git a/vendor/github.com/klauspost/cpuid/detect_arm64.go b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
index 923a82618..9a53504a0 100644
--- a/vendor/github.com/klauspost/cpuid/detect_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
@@ -1,9 +1,12 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build arm64,!gccgo,!noasm,!appengine
+//go:build arm64 && !gccgo && !noasm && !appengine
+// +build arm64,!gccgo,!noasm,!appengine
 
 package cpuid
 
+import "runtime"
+
 func getMidr() (midr uint64)
 func getProcFeatures() (procFeatures uint64)
 func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
@@ -15,14 +18,19 @@ func initCPU() {
 	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
 }
 
-func addInfo(c *CPUInfo) {
-	// ARM64 disabled for now.
-	if true {
+func addInfo(c *CPUInfo, safe bool) {
+	// Seems to be safe to assume on ARM64
+	c.CacheLine = 64
+	detectOS(c)
+
+	// ARM64 disabled since it may crash if interrupt is not intercepted by OS.
+	if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
 		return
 	}
-	// 	midr := getMidr()
+	midr := getMidr()
 
 	// MIDR_EL1 - Main ID Register
+	// https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
 	//  x--------------------------------------------------x
 	//  | Name                         |  bits   | visible |
 	//  |--------------------------------------------------|
@@ -37,11 +45,70 @@ func addInfo(c *CPUInfo) {
 	//  | Revision                     | [3-0]   |    y    |
 	//  x--------------------------------------------------x
 
-	// 	fmt.Printf(" implementer:  0x%02x\n", (midr>>24)&0xff)
-	// 	fmt.Printf("     variant:   0x%01x\n", (midr>>20)&0xf)
-	// 	fmt.Printf("architecture:   0x%01x\n", (midr>>16)&0xf)
-	// 	fmt.Printf("    part num: 0x%03x\n", (midr>>4)&0xfff)
-	// 	fmt.Printf("    revision:   0x%01x\n", (midr>>0)&0xf)
+	switch (midr >> 24) & 0xff {
+	case 0xC0:
+		c.VendorString = "Ampere Computing"
+		c.VendorID = Ampere
+	case 0x41:
+		c.VendorString = "Arm Limited"
+		c.VendorID = ARM
+	case 0x42:
+		c.VendorString = "Broadcom Corporation"
+		c.VendorID = Broadcom
+	case 0x43:
+		c.VendorString = "Cavium Inc"
+		c.VendorID = Cavium
+	case 0x44:
+		c.VendorString = "Digital Equipment Corporation"
+		c.VendorID = DEC
+	case 0x46:
+		c.VendorString = "Fujitsu Ltd"
+		c.VendorID = Fujitsu
+	case 0x49:
+		c.VendorString = "Infineon Technologies AG"
+		c.VendorID = Infineon
+	case 0x4D:
+		c.VendorString = "Motorola or Freescale Semiconductor Inc"
+		c.VendorID = Motorola
+	case 0x4E:
+		c.VendorString = "NVIDIA Corporation"
+		c.VendorID = NVIDIA
+	case 0x50:
+		c.VendorString = "Applied Micro Circuits Corporation"
+		c.VendorID = AMCC
+	case 0x51:
+		c.VendorString = "Qualcomm Inc"
+		c.VendorID = Qualcomm
+	case 0x56:
+		c.VendorString = "Marvell International Ltd"
+		c.VendorID = Marvell
+	case 0x69:
+		c.VendorString = "Intel Corporation"
+		c.VendorID = Intel
+	}
+
+	// Lower 4 bits: Architecture
+	// Architecture	Meaning
+	// 0b0001		Armv4.
+	// 0b0010		Armv4T.
+	// 0b0011		Armv5 (obsolete).
+	// 0b0100		Armv5T.
+	// 0b0101		Armv5TE.
+	// 0b0110		Armv5TEJ.
+	// 0b0111		Armv6.
+	// 0b1111		Architectural features are individually identified in the ID_* registers, see 'ID registers'.
+	// Upper 4 bit: Variant
+	// An IMPLEMENTATION DEFINED variant number.
+	// Typically, this field is used to distinguish between different product variants, or major revisions of a product.
+	c.Family = int(midr>>16) & 0xff
+
+	// PartNum, bits [15:4]
+	// An IMPLEMENTATION DEFINED primary part number for the device.
+	// On processors implemented by Arm, if the top four bits of the primary
+	// part number are 0x0 or 0x7, the variant and architecture are encoded differently.
+	// Revision, bits [3:0]
+	// An IMPLEMENTATION DEFINED revision number for the device.
+	c.Model = int(midr) & 0xffff
 
 	procFeatures := getProcFeatures()
 
@@ -68,25 +135,18 @@ func addInfo(c *CPUInfo) {
 	// | EL0                          | [3-0]   |    n    |
 	// x--------------------------------------------------x
 
-	var f ArmFlags
+	var f flagSet
 	// if procFeatures&(0xf<<48) != 0 {
 	// 	fmt.Println("DIT")
 	// }
-	if procFeatures&(0xf<<32) != 0 {
-		f |= SVE
-	}
+	f.setIf(procFeatures&(0xf<<32) != 0, SVE)
 	if procFeatures&(0xf<<20) != 15<<20 {
-		f |= ASIMD
-		if procFeatures&(0xf<<20) == 1<<20 {
-			// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
-			// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
-			f |= FPHP
-			f |= ASIMDHP
-		}
-	}
-	if procFeatures&(0xf<<16) != 0 {
-		f |= FP
+		f.set(ASIMD)
+		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
+		// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
+		f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
 	}
+	f.setIf(procFeatures&(0xf<<16) != 0, FP)
 
 	instAttrReg0, instAttrReg1 := getInstAttributes()
 
@@ -127,46 +187,22 @@ func addInfo(c *CPUInfo) {
 	// if instAttrReg0&(0xf<<48) != 0 {
 	// 	fmt.Println("FHM")
 	// }
-	if instAttrReg0&(0xf<<44) != 0 {
-		f |= ASIMDDP
-	}
-	if instAttrReg0&(0xf<<40) != 0 {
-		f |= SM4
-	}
-	if instAttrReg0&(0xf<<36) != 0 {
-		f |= SM3
-	}
-	if instAttrReg0&(0xf<<32) != 0 {
-		f |= SHA3
-	}
-	if instAttrReg0&(0xf<<28) != 0 {
-		f |= ASIMDRDM
-	}
-	if instAttrReg0&(0xf<<20) != 0 {
-		f |= ATOMICS
-	}
-	if instAttrReg0&(0xf<<16) != 0 {
-		f |= CRC32
-	}
-	if instAttrReg0&(0xf<<12) != 0 {
-		f |= SHA2
-	}
-	if instAttrReg0&(0xf<<12) == 2<<12 {
-		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-		// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
-		f |= SHA512
-	}
-	if instAttrReg0&(0xf<<8) != 0 {
-		f |= SHA1
-	}
-	if instAttrReg0&(0xf<<4) != 0 {
-		f |= AES
-	}
-	if instAttrReg0&(0xf<<4) == 2<<4 {
-		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-		// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
-		f |= PMULL
-	}
+	f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
+	f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
+	f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
+	f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
+	f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
+	f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
+	f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
+	f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+	// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
+	f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
+	f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
+	f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
+	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+	// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
+	f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
 
 	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
 	//
@@ -194,26 +230,18 @@ func addInfo(c *CPUInfo) {
 	// if instAttrReg1&(0xf<<28) != 0 {
 	// 	fmt.Println("GPI")
 	// }
-	if instAttrReg1&(0xf<<28) != 24 {
-		f |= GPA
-	}
-	if instAttrReg1&(0xf<<20) != 0 {
-		f |= LRCPC
-	}
-	if instAttrReg1&(0xf<<16) != 0 {
-		f |= FCMA
-	}
-	if instAttrReg1&(0xf<<12) != 0 {
-		f |= JSCVT
-	}
+	f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
+	f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
+	f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
+	f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
 	// if instAttrReg1&(0xf<<8) != 0 {
 	// 	fmt.Println("API")
 	// }
 	// if instAttrReg1&(0xf<<4) != 0 {
 	// 	fmt.Println("APA")
 	// }
-	if instAttrReg1&(0xf<<0) != 0 {
-		f |= DCPOP
-	}
-	c.Arm = f
+	f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
+
+	// Store
+	c.featureSet.or(f)
 }
diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
index 970ff3d22..9636c2bc1 100644
--- a/vendor/github.com/klauspost/cpuid/detect_ref.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
@@ -1,6 +1,7 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build !amd64,!386,!arm64 gccgo noasm appengine
+//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
+// +build !amd64,!386,!arm64 gccgo noasm appengine
 
 package cpuid
 
@@ -11,4 +12,4 @@ func initCPU() {
 	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
 }
 
-func addInfo(info *CPUInfo) {}
+func addInfo(info *CPUInfo, safe bool) {}
diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
index 363951b3b..c946824ec 100644
--- a/vendor/github.com/klauspost/cpuid/detect_intel.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@@ -1,6 +1,7 @@
 // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
 
-//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine
+//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
+// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
 
 package cpuid
 
@@ -8,26 +9,28 @@ func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
 func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
 func asmXgetbv(index uint32) (eax, edx uint32)
 func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+func asmDarwinHasAVX512() bool
 
 func initCPU() {
 	cpuid = asmCpuid
 	cpuidex = asmCpuidex
 	xgetbv = asmXgetbv
 	rdtscpAsm = asmRdtscpAsm
+	darwinHasAVX512 = asmDarwinHasAVX512
 }
 
-func addInfo(c *CPUInfo) {
+func addInfo(c *CPUInfo, safe bool) {
 	c.maxFunc = maxFunctionID()
 	c.maxExFunc = maxExtendedFunction()
 	c.BrandName = brandName()
 	c.CacheLine = cacheLine()
-	c.Family, c.Model = familyModel()
-	c.Features = support()
-	c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0)
+	c.Family, c.Model, c.Stepping = familyModel()
+	c.featureSet = support()
+	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
 	c.ThreadsPerCore = threadsPerCore()
 	c.LogicalCores = logicalCores()
 	c.PhysicalCores = physicalCores()
 	c.VendorID, c.VendorString = vendorID()
-	c.Hz = hertz(c.BrandName)
 	c.cacheSize()
+	c.frequencies()
 }
diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
new file mode 100644
index 000000000..d12e547c4
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@@ -0,0 +1,235 @@
+// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
+
+package cpuid
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[ADX-1]
+	_ = x[AESNI-2]
+	_ = x[AMD3DNOW-3]
+	_ = x[AMD3DNOWEXT-4]
+	_ = x[AMXBF16-5]
+	_ = x[AMXINT8-6]
+	_ = x[AMXTILE-7]
+	_ = x[AVX-8]
+	_ = x[AVX2-9]
+	_ = x[AVX512BF16-10]
+	_ = x[AVX512BITALG-11]
+	_ = x[AVX512BW-12]
+	_ = x[AVX512CD-13]
+	_ = x[AVX512DQ-14]
+	_ = x[AVX512ER-15]
+	_ = x[AVX512F-16]
+	_ = x[AVX512FP16-17]
+	_ = x[AVX512IFMA-18]
+	_ = x[AVX512PF-19]
+	_ = x[AVX512VBMI-20]
+	_ = x[AVX512VBMI2-21]
+	_ = x[AVX512VL-22]
+	_ = x[AVX512VNNI-23]
+	_ = x[AVX512VP2INTERSECT-24]
+	_ = x[AVX512VPOPCNTDQ-25]
+	_ = x[AVXSLOW-26]
+	_ = x[AVXVNNI-27]
+	_ = x[BMI1-28]
+	_ = x[BMI2-29]
+	_ = x[CETIBT-30]
+	_ = x[CETSS-31]
+	_ = x[CLDEMOTE-32]
+	_ = x[CLMUL-33]
+	_ = x[CLZERO-34]
+	_ = x[CMOV-35]
+	_ = x[CMPSB_SCADBS_SHORT-36]
+	_ = x[CMPXCHG8-37]
+	_ = x[CPBOOST-38]
+	_ = x[CX16-39]
+	_ = x[ENQCMD-40]
+	_ = x[ERMS-41]
+	_ = x[F16C-42]
+	_ = x[FMA3-43]
+	_ = x[FMA4-44]
+	_ = x[FXSR-45]
+	_ = x[FXSROPT-46]
+	_ = x[GFNI-47]
+	_ = x[HLE-48]
+	_ = x[HRESET-49]
+	_ = x[HTT-50]
+	_ = x[HWA-51]
+	_ = x[HYPERVISOR-52]
+	_ = x[IBPB-53]
+	_ = x[IBS-54]
+	_ = x[IBSBRNTRGT-55]
+	_ = x[IBSFETCHSAM-56]
+	_ = x[IBSFFV-57]
+	_ = x[IBSOPCNT-58]
+	_ = x[IBSOPCNTEXT-59]
+	_ = x[IBSOPSAM-60]
+	_ = x[IBSRDWROPCNT-61]
+	_ = x[IBSRIPINVALIDCHK-62]
+	_ = x[IBS_PREVENTHOST-63]
+	_ = x[INT_WBINVD-64]
+	_ = x[INVLPGB-65]
+	_ = x[LAHF-66]
+	_ = x[LAM-67]
+	_ = x[LBRVIRT-68]
+	_ = x[LZCNT-69]
+	_ = x[MCAOVERFLOW-70]
+	_ = x[MCOMMIT-71]
+	_ = x[MMX-72]
+	_ = x[MMXEXT-73]
+	_ = x[MOVBE-74]
+	_ = x[MOVDIR64B-75]
+	_ = x[MOVDIRI-76]
+	_ = x[MOVSB_ZL-77]
+	_ = x[MPX-78]
+	_ = x[MSRIRC-79]
+	_ = x[MSR_PAGEFLUSH-80]
+	_ = x[NRIPS-81]
+	_ = x[NX-82]
+	_ = x[OSXSAVE-83]
+	_ = x[PCONFIG-84]
+	_ = x[POPCNT-85]
+	_ = x[RDPRU-86]
+	_ = x[RDRAND-87]
+	_ = x[RDSEED-88]
+	_ = x[RDTSCP-89]
+	_ = x[RTM-90]
+	_ = x[RTM_ALWAYS_ABORT-91]
+	_ = x[SERIALIZE-92]
+	_ = x[SEV-93]
+	_ = x[SEV_64BIT-94]
+	_ = x[SEV_ALTERNATIVE-95]
+	_ = x[SEV_DEBUGSWAP-96]
+	_ = x[SEV_ES-97]
+	_ = x[SEV_RESTRICTED-98]
+	_ = x[SEV_SNP-99]
+	_ = x[SGX-100]
+	_ = x[SGXLC-101]
+	_ = x[SHA-102]
+	_ = x[SME-103]
+	_ = x[SME_COHERENT-104]
+	_ = x[SSE-105]
+	_ = x[SSE2-106]
+	_ = x[SSE3-107]
+	_ = x[SSE4-108]
+	_ = x[SSE42-109]
+	_ = x[SSE4A-110]
+	_ = x[SSSE3-111]
+	_ = x[STIBP-112]
+	_ = x[STOSB_SHORT-113]
+	_ = x[SUCCOR-114]
+	_ = x[SVM-115]
+	_ = x[SVMDA-116]
+	_ = x[SVMFBASID-117]
+	_ = x[SVML-118]
+	_ = x[SVMNP-119]
+	_ = x[SVMPF-120]
+	_ = x[SVMPFT-121]
+	_ = x[SYSCALL-122]
+	_ = x[SYSEE-123]
+	_ = x[TBM-124]
+	_ = x[TOPEXT-125]
+	_ = x[TME-126]
+	_ = x[TSCRATEMSR-127]
+	_ = x[TSXLDTRK-128]
+	_ = x[VAES-129]
+	_ = x[VMCBCLEAN-130]
+	_ = x[VMPL-131]
+	_ = x[VMSA_REGPROT-132]
+	_ = x[VMX-133]
+	_ = x[VPCLMULQDQ-134]
+	_ = x[VTE-135]
+	_ = x[WAITPKG-136]
+	_ = x[WBNOINVD-137]
+	_ = x[X87-138]
+	_ = x[XGETBV1-139]
+	_ = x[XOP-140]
+	_ = x[XSAVE-141]
+	_ = x[XSAVEC-142]
+	_ = x[XSAVEOPT-143]
+	_ = x[XSAVES-144]
+	_ = x[AESARM-145]
+	_ = x[ARMCPUID-146]
+	_ = x[ASIMD-147]
+	_ = x[ASIMDDP-148]
+	_ = x[ASIMDHP-149]
+	_ = x[ASIMDRDM-150]
+	_ = x[ATOMICS-151]
+	_ = x[CRC32-152]
+	_ = x[DCPOP-153]
+	_ = x[EVTSTRM-154]
+	_ = x[FCMA-155]
+	_ = x[FP-156]
+	_ = x[FPHP-157]
+	_ = x[GPA-158]
+	_ = x[JSCVT-159]
+	_ = x[LRCPC-160]
+	_ = x[PMULL-161]
+	_ = x[SHA1-162]
+	_ = x[SHA2-163]
+	_ = x[SHA3-164]
+	_ = x[SHA512-165]
+	_ = x[SM3-166]
+	_ = x[SM4-167]
+	_ = x[SVE-168]
+	_ = x[lastID-169]
+	_ = x[firstID-0]
+}
+
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWAVXVNNIBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHRESETHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_PREVENTHOSTINT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTOPEXTTMETSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 237, 241, 245, 251, 256, 264, 269, 275, 279, 297, 305, 312, 316, 322, 326, 330, 334, 338, 342, 349, 353, 356, 362, 365, 368, 378, 382, 385, 395, 406, 412, 420, 431, 439, 451, 467, 482, 492, 499, 503, 506, 513, 518, 529, 536, 539, 545, 550, 559, 566, 574, 577, 583, 596, 601, 603, 610, 617, 623, 628, 634, 640, 646, 649, 665, 674, 677, 686, 701, 714, 720, 734, 741, 744, 749, 752, 755, 767, 770, 774, 778, 782, 787, 792, 797, 802, 813, 819, 822, 827, 836, 840, 845, 850, 856, 863, 868, 871, 877, 880, 890, 898, 902, 911, 915, 927, 930, 940, 943, 950, 958, 961, 968, 971, 976, 982, 990, 996, 1002, 1010, 1015, 1022, 1029, 1037, 1044, 1049, 1054, 1061, 1065, 1067, 1071, 1074, 1079, 1084, 1089, 1093, 1097, 1101, 1107, 1110, 1113, 1116, 1122}
+
+func (i FeatureID) String() string {
+	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
+		return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
+}
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[VendorUnknown-0]
+	_ = x[Intel-1]
+	_ = x[AMD-2]
+	_ = x[VIA-3]
+	_ = x[Transmeta-4]
+	_ = x[NSC-5]
+	_ = x[KVM-6]
+	_ = x[MSVM-7]
+	_ = x[VMware-8]
+	_ = x[XenHVM-9]
+	_ = x[Bhyve-10]
+	_ = x[Hygon-11]
+	_ = x[SiS-12]
+	_ = x[RDC-13]
+	_ = x[Ampere-14]
+	_ = x[ARM-15]
+	_ = x[Broadcom-16]
+	_ = x[Cavium-17]
+	_ = x[DEC-18]
+	_ = x[Fujitsu-19]
+	_ = x[Infineon-20]
+	_ = x[Motorola-21]
+	_ = x[NVIDIA-22]
+	_ = x[AMCC-23]
+	_ = x[Qualcomm-24]
+	_ = x[Marvell-25]
+	_ = x[lastVendor-26]
+}
+
+const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
+
+var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
+
+func (i Vendor) String() string {
+	if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
+		return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
new file mode 100644
index 000000000..d91d02109
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
@@ -0,0 +1,121 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+package cpuid
+
+import (
+	"runtime"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
+
+func detectOS(c *CPUInfo) bool {
+	if runtime.GOOS != "ios" {
+		tryToFillCPUInfoFomSysctl(c)
+	}
+	// There are no hw.optional sysctl values for the below features on Mac OS 11.0
+	// to detect their supported state dynamically. Assume the CPU features that
+	// Apple Silicon M1 supports to be available as a minimal set of features
+	// to all Go programs running on darwin/arm64.
+	// TODO: Add more if we know them.
+	c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
+
+	return true
+}
+
+func sysctlGetBool(name string) bool {
+	value, err := unix.SysctlUint32(name)
+	if err != nil {
+		return false
+	}
+	return value != 0
+}
+
+func sysctlGetString(name string) string {
+	value, err := unix.Sysctl(name)
+	if err != nil {
+		return ""
+	}
+	return value
+}
+
+func sysctlGetInt(unknown int, names ...string) int {
+	for _, name := range names {
+		value, err := unix.SysctlUint32(name)
+		if err != nil {
+			continue
+		}
+		if value != 0 {
+			return int(value)
+		}
+	}
+	return unknown
+}
+
+func sysctlGetInt64(unknown int, names ...string) int {
+	for _, name := range names {
+		value64, err := unix.SysctlUint64(name)
+		if err != nil {
+			continue
+		}
+		if int(value64) != unknown {
+			return int(value64)
+		}
+	}
+	return unknown
+}
+
+func setFeature(c *CPUInfo, name string, feature FeatureID) {
+	c.featureSet.setIf(sysctlGetBool(name), feature)
+}
+func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
+	c.BrandName = sysctlGetString("machdep.cpu.brand_string")
+
+	if len(c.BrandName) != 0 {
+		c.VendorString = strings.Fields(c.BrandName)[0]
+	}
+
+	c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
+	c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
+		sysctlGetInt(1, "hw.physicalcpu")
+	c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
+	c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
+	c.Model = sysctlGetInt(0, "machdep.cpu.model")
+	c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
+	c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
+	c.Cache.L1D = sysctlGetInt64(-1, "hw.l1icachesize")
+	c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
+	c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
+
+	// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
+	setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
+	setFeature(c, "hw.optional.AdvSIMD", ASIMD)
+	setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
+	setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
+	setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
+	setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
+	// setFeature(c, "", EVTSTRM)
+	setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
+	setFeature(c, "hw.optional.arm.FEAT_FP", FP)
+	setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
+	setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
+	setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
+	setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
+	setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
+	setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
+	setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
+	setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
+	setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
+	// setFeature(c, "", SM3)
+	// setFeature(c, "", SM4)
+	setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
+
+	// from empirical observation
+	setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
+	setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
+	setFeature(c, "hw.optional.floatingpoint", FP)
+	setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
+	setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
+	setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
+	setFeature(c, "hw.optional.armv8_crc32", CRC32)
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
new file mode 100644
index 000000000..ee278b9e4
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
@@ -0,0 +1,130 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file located
+// here https://github.com/golang/sys/blob/master/LICENSE
+
+package cpuid
+
+import (
+	"encoding/binary"
+	"io/ioutil"
+	"runtime"
+)
+
+// HWCAP bits.
+const (
+	hwcap_FP       = 1 << 0
+	hwcap_ASIMD    = 1 << 1
+	hwcap_EVTSTRM  = 1 << 2
+	hwcap_AES      = 1 << 3
+	hwcap_PMULL    = 1 << 4
+	hwcap_SHA1     = 1 << 5
+	hwcap_SHA2     = 1 << 6
+	hwcap_CRC32    = 1 << 7
+	hwcap_ATOMICS  = 1 << 8
+	hwcap_FPHP     = 1 << 9
+	hwcap_ASIMDHP  = 1 << 10
+	hwcap_CPUID    = 1 << 11
+	hwcap_ASIMDRDM = 1 << 12
+	hwcap_JSCVT    = 1 << 13
+	hwcap_FCMA     = 1 << 14
+	hwcap_LRCPC    = 1 << 15
+	hwcap_DCPOP    = 1 << 16
+	hwcap_SHA3     = 1 << 17
+	hwcap_SM3      = 1 << 18
+	hwcap_SM4      = 1 << 19
+	hwcap_ASIMDDP  = 1 << 20
+	hwcap_SHA512   = 1 << 21
+	hwcap_SVE      = 1 << 22
+	hwcap_ASIMDFHM = 1 << 23
+)
+
+func detectOS(c *CPUInfo) bool {
+	// For now assuming no hyperthreading is reasonable.
+	c.LogicalCores = runtime.NumCPU()
+	c.PhysicalCores = c.LogicalCores
+	c.ThreadsPerCore = 1
+	if hwcap == 0 {
+		// We did not get values from the runtime.
+		// Try reading /proc/self/auxv
+
+		// From https://github.com/golang/sys
+		const (
+			_AT_HWCAP  = 16
+			_AT_HWCAP2 = 26
+
+			uintSize = int(32 << (^uint(0) >> 63))
+		)
+
+		buf, err := ioutil.ReadFile("/proc/self/auxv")
+		if err != nil {
+			// e.g. on android /proc/self/auxv is not accessible, so silently
+			// ignore the error and leave Initialized = false. On some
+			// architectures (e.g. arm64) doinit() implements a fallback
+			// readout and will set Initialized = true again.
+			return false
+		}
+		bo := binary.LittleEndian
+		for len(buf) >= 2*(uintSize/8) {
+			var tag, val uint
+			switch uintSize {
+			case 32:
+				tag = uint(bo.Uint32(buf[0:]))
+				val = uint(bo.Uint32(buf[4:]))
+				buf = buf[8:]
+			case 64:
+				tag = uint(bo.Uint64(buf[0:]))
+				val = uint(bo.Uint64(buf[8:]))
+				buf = buf[16:]
+			}
+			switch tag {
+			case _AT_HWCAP:
+				hwcap = val
+			case _AT_HWCAP2:
+				// Not used
+			}
+		}
+		if hwcap == 0 {
+			return false
+		}
+	}
+
+	// HWCap was populated by the runtime from the auxiliary vector.
+	// Use HWCap information since reading aarch64 system registers
+	// is not supported in user space on older linux kernels.
+	c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
+	c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
+	c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
+	c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
+	c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
+	c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
+	c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
+
+	// The Samsung S9+ kernel reports support for atomics, but not all cores
+	// actually support them, resulting in SIGILL. See issue #28431.
+	// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
+	c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
+
+	return true
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
new file mode 100644
index 000000000..8733ba343
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
@@ -0,0 +1,16 @@
+// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build arm64 && !linux && !darwin
+// +build arm64,!linux,!darwin
+
+package cpuid
+
+import "runtime"
+
+func detectOS(c *CPUInfo) bool {
+	c.PhysicalCores = runtime.NumCPU()
+	// For now assuming 1 thread per core...
+	c.ThreadsPerCore = 1
+	c.LogicalCores = c.PhysicalCores
+	return false
+}
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
new file mode 100644
index 000000000..f8f201b5f
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
@@ -0,0 +1,8 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build nounsafe
+// +build nounsafe
+
+package cpuid
+
+var hwcap uint
diff --git a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
new file mode 100644
index 000000000..92af622eb
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
@@ -0,0 +1,11 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//go:build !nounsafe
+// +build !nounsafe
+
+package cpuid
+
+import _ "unsafe" // needed for go:linkname
+
+//go:linkname hwcap internal/cpu.HWCap
+var hwcap uint
diff --git a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
new file mode 100644
index 000000000..471d986d2
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+set -e
+
+go tool dist list | while IFS=/ read os arch; do
+    echo "Checking $os/$arch..."
+    echo " normal"
+    GOARCH=$arch GOOS=$os go build -o /dev/null .
+    echo " noasm"
+    GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
+    echo " appengine"
+    GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
+    echo " noasm,appengine"
+    GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
+done