summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/cpuid/v2/cpuid.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid.go1023
1 files changed, 1023 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
new file mode 100644
index 00000000..43e9cc17
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -0,0 +1,1023 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) as well as arm64 is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package cpuid
+
+import (
+ "flag"
+ "fmt"
+ "math"
+ "os"
+ "runtime"
+ "strings"
+)
+
+// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
+// and Processor Programming Reference (PPR)
+
+// Vendor is a representation of a CPU vendor.
+type Vendor int
+
+const (
+ VendorUnknown Vendor = iota
+ Intel
+ AMD
+ VIA
+ Transmeta
+ NSC
+ KVM // Kernel-based Virtual Machine
+ MSVM // Microsoft Hyper-V or Windows Virtual PC
+ VMware
+ XenHVM
+ Bhyve
+ Hygon
+ SiS
+ RDC
+
+ Ampere
+ ARM
+ Broadcom
+ Cavium
+ DEC
+ Fujitsu
+ Infineon
+ Motorola
+ NVIDIA
+ AMCC
+ Qualcomm
+ Marvell
+
+ lastVendor
+)
+
+//go:generate stringer -type=FeatureID,Vendor
+
+// FeatureID is the ID of a specific cpu feature.
+type FeatureID int
+
+const (
+ // Keep index -1 as unknown
+ UNKNOWN = -1
+
+ // Add features
+ ADX FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+ AESNI // Advanced Encryption Standard New Instructions
+ AMD3DNOW // AMD 3DNOW
+ AMD3DNOWEXT // AMD 3DNowExt
+ AMXBF16 // Tile computational operations on BFLOAT16 numbers
+ AMXINT8 // Tile computational operations on 8-bit integers
+ AMXTILE // Tile architecture
+ AVX // AVX functions
+ AVX2 // AVX2 functions
+ AVX512BF16 // AVX-512 BFLOAT16 Instructions
+ AVX512BITALG // AVX-512 Bit Algorithms
+ AVX512BW // AVX-512 Byte and Word Instructions
+ AVX512CD // AVX-512 Conflict Detection Instructions
+ AVX512DQ // AVX-512 Doubleword and Quadword Instructions
+ AVX512ER // AVX-512 Exponential and Reciprocal Instructions
+ AVX512F // AVX-512 Foundation
+ AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
+ AVX512PF // AVX-512 Prefetch Instructions
+ AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
+ AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
+ AVX512VL // AVX-512 Vector Length Extensions
+ AVX512VNNI // AVX-512 Vector Neural Network Instructions
+ AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
+ AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
+ AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one.
+ BMI1 // Bit Manipulation Instruction Set 1
+ BMI2 // Bit Manipulation Instruction Set 2
+ CLDEMOTE // Cache Line Demote
+ CLMUL // Carry-less Multiplication
+ CMOV // i686 CMOV
+ CX16 // CMPXCHG16B Instruction
+ ENQCMD // Enqueue Command
+ ERMS // Enhanced REP MOVSB/STOSB
+ F16C // Half-precision floating-point conversion
+ FMA3 // Intel FMA 3. Does not imply AVX.
+ FMA4 // Bulldozer FMA4 functions
+ GFNI // Galois Field New Instructions
+ HLE // Hardware Lock Elision
+ HTT // Hyperthreading (enabled)
+ HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
+ IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+ IBS // Instruction Based Sampling (AMD)
+ IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
+ IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
+ IBSFFV // Instruction Based Sampling Feature (AMD)
+ IBSOPCNT // Instruction Based Sampling Feature (AMD)
+ IBSOPCNTEXT // Instruction Based Sampling Feature (AMD)
+ IBSOPSAM // Instruction Based Sampling Feature (AMD)
+ IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
+ IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
+ LZCNT // LZCNT instruction
+ MMX // standard MMX
+ MMXEXT // SSE integer functions or AMD MMX ext
+ MOVDIR64B // Move 64 Bytes as Direct Store
+ MOVDIRI // Move Doubleword as Direct Store
+ MPX // Intel MPX (Memory Protection Extensions)
+ NX // NX (No-Execute) bit
+ POPCNT // POPCNT instruction
+ RDRAND // RDRAND instruction is available
+ RDSEED // RDSEED instruction is available
+ RDTSCP // RDTSCP Instruction
+ RTM // Restricted Transactional Memory
+ SERIALIZE // Serialize Instruction Execution
+ SGX // Software Guard Extensions
+ SGXLC // Software Guard Extensions Launch Control
+ SHA // Intel SHA Extensions
+ SSE // SSE functions
+ SSE2 // P4 SSE functions
+ SSE3 // Prescott SSE3 functions
+ SSE4 // Penryn SSE4.1 functions
+ SSE42 // Nehalem SSE4.2 functions
+ SSE4A // AMD Barcelona microarchitecture SSE4a instructions
+ SSSE3 // Conroe SSSE3 functions
+ STIBP // Single Thread Indirect Branch Predictors
+ TBM // AMD Trailing Bit Manipulation
+ TSXLDTRK // Intel TSX Suspend Load Address Tracking
+ VAES // Vector AES
+ VMX // Virtual Machine Extensions
+ VPCLMULQDQ // Carry-Less Multiplication Quadword
+ WAITPKG // TPAUSE, UMONITOR, UMWAIT
+ WBNOINVD // Write Back and Do Not Invalidate Cache
+ XOP // Bulldozer XOP functions
+
+ // ARM features:
+ AESARM // AES instructions
+ ARMCPUID // Some CPU ID registers readable at user-level
+ ASIMD // Advanced SIMD
+ ASIMDDP // SIMD Dot Product
+ ASIMDHP // Advanced SIMD half-precision floating point
+ ASIMDRDM // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+ ATOMICS // Large System Extensions (LSE)
+ CRC32 // CRC32/CRC32C instructions
+ DCPOP // Data cache clean to Point of Persistence (DC CVAP)
+ EVTSTRM // Generic timer
+ FCMA // Floatin point complex number addition and multiplication
+ FP // Single-precision and double-precision floating point
+ FPHP // Half-precision floating point
+ GPA // Generic Pointer Authentication
+ JSCVT // Javascript-style double->int convert (FJCVTZS)
+ LRCPC // Weaker release consistency (LDAPR, etc)
+ PMULL // Polynomial Multiply instructions (PMULL/PMULL2)
+ SHA1 // SHA-1 instructions (SHA1C, etc)
+ SHA2 // SHA-2 instructions (SHA256H, etc)
+ SHA3 // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+ SHA512 // SHA512 instructions
+ SM3 // SM3 instructions
+ SM4 // SM4 instructions
+ SVE // Scalable Vector Extension
+
+ // Keep it last. It automatically defines the size of []flagSet
+ lastID
+
+ firstID FeatureID = UNKNOWN + 1
+)
+
+// CPUInfo contains information about the detected system CPU.
+type CPUInfo struct {
+ BrandName string // Brand name reported by the CPU
+ VendorID Vendor // Comparable CPU vendor ID
+ VendorString string // Raw vendor string.
+ featureSet flagSet // Features of the CPU
+ PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+ ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
+ LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+ Family int // CPU family number
+ Model int // CPU model number
+ CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
+ Hz int64 // Clock speed, if known, 0 otherwise
+ Cache struct {
+ L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
+ L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
+ L2 int // L2 Cache (per core or shared). Will be -1 if undetected
+ L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
+ }
+ SGX SGXSupport
+ maxFunc uint32
+ maxExFunc uint32
+}
+
+var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
+var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+var xgetbv func(index uint32) (eax, edx uint32)
+var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+var darwinHasAVX512 = func() bool { return false }
+
+// CPU contains information about the CPU as detected on startup,
+// or when Detect last was called.
+//
+// Use this as the primary entry point to you data.
+var CPU CPUInfo
+
+func init() {
+ initCPU()
+ Detect()
+}
+
+// Detect will re-detect current CPU info.
+// This will replace the content of the exported CPU variable.
+//
+// Unless you expect the CPU to change while you are running your program
+// you should not need to call this function.
+// If you call this, you must ensure that no other goroutine is accessing the
+// exported CPU variable.
+func Detect() {
+ // Set defaults
+ CPU.ThreadsPerCore = 1
+ CPU.Cache.L1I = -1
+ CPU.Cache.L1D = -1
+ CPU.Cache.L2 = -1
+ CPU.Cache.L3 = -1
+ safe := true
+ if detectArmFlag != nil {
+ safe = !*detectArmFlag
+ }
+ addInfo(&CPU, safe)
+ if displayFeats != nil && *displayFeats {
+ fmt.Println("cpu features:", strings.Join(CPU.FeatureSet(), ","))
+ // Exit with non-zero so tests will print value.
+ os.Exit(1)
+ }
+ if disableFlag != nil {
+ s := strings.Split(*disableFlag, ",")
+ for _, feat := range s {
+ feat := ParseFeature(strings.TrimSpace(feat))
+ if feat != UNKNOWN {
+ CPU.featureSet.unset(feat)
+ }
+ }
+ }
+}
+
+// DetectARM will detect ARM64 features.
+// This is NOT done automatically since it can potentially crash
+// if the OS does not handle the command.
+// If in the future this can be done safely this function may not
+// do anything.
+func DetectARM() {
+ addInfo(&CPU, false)
+}
+
+var detectArmFlag *bool
+var displayFeats *bool
+var disableFlag *string
+
+// Flags will enable flags.
+// This must be called *before* flag.Parse AND
+// Detect must be called after the flags have been parsed.
+// Note that this means that any detection used in init() functions
+// will not contain these flags.
+func Flags() {
+ disableFlag = flag.String("cpu.disable", "", "disable cpu features; comma separated list")
+ displayFeats = flag.Bool("cpu.features", false, "lists cpu features and exits")
+ detectArmFlag = flag.Bool("cpu.arm", false, "allow ARM features to be detected; can potentially crash")
+}
+
+// Supports returns whether the CPU supports all of the requested features.
+func (c CPUInfo) Supports(ids ...FeatureID) bool {
+ for _, id := range ids {
+ if !c.featureSet.inSet(id) {
+ return false
+ }
+ }
+ return true
+}
+
+// Has allows for checking a single feature.
+// Should be inlined by the compiler.
+func (c CPUInfo) Has(id FeatureID) bool {
+ return c.featureSet.inSet(id)
+}
+
+// Disable will disable one or several features.
+func (c *CPUInfo) Disable(ids ...FeatureID) bool {
+ for _, id := range ids {
+ c.featureSet.unset(id)
+ }
+ return true
+}
+
+// Enable will disable one or several features even if they were undetected.
+// This is of course not recommended for obvious reasons.
+func (c *CPUInfo) Enable(ids ...FeatureID) bool {
+ for _, id := range ids {
+ c.featureSet.set(id)
+ }
+ return true
+}
+
+// IsVendor returns true if vendor is recognized as Intel
+func (c CPUInfo) IsVendor(v Vendor) bool {
+ return c.VendorID == v
+}
+
+func (c CPUInfo) FeatureSet() []string {
+ s := make([]string, 0)
+ for _, f := range c.featureSet.Strings() {
+ s = append(s, f)
+ }
+ return s
+}
+
+// RTCounter returns the 64-bit time-stamp counter
+// Uses the RDTSCP instruction. The value 0 is returned
+// if the CPU does not support the instruction.
+func (c CPUInfo) RTCounter() uint64 {
+ if !c.Supports(RDTSCP) {
+ return 0
+ }
+ a, _, _, d := rdtscpAsm()
+ return uint64(a) | (uint64(d) << 32)
+}
+
+// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
+// This variable is OS dependent, but on Linux contains information
+// about the current cpu/core the code is running on.
+// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
+func (c CPUInfo) Ia32TscAux() uint32 {
+ if !c.Supports(RDTSCP) {
+ return 0
+ }
+ _, _, ecx, _ := rdtscpAsm()
+ return ecx
+}
+
+// LogicalCPU will return the Logical CPU the code is currently executing on.
+// This is likely to change when the OS re-schedules the running thread
+// to another CPU.
+// If the current core cannot be detected, -1 will be returned.
+func (c CPUInfo) LogicalCPU() int {
+ if c.maxFunc < 1 {
+ return -1
+ }
+ _, ebx, _, _ := cpuid(1)
+ return int(ebx >> 24)
+}
+
+// hertz tries to compute the clock speed of the CPU. If leaf 15 is
+// supported, use it, otherwise parse the brand string. Yes, really.
+func hertz(model string) int64 {
+ mfi := maxFunctionID()
+ if mfi >= 0x15 {
+ eax, ebx, ecx, _ := cpuid(0x15)
+ if eax != 0 && ebx != 0 && ecx != 0 {
+ return int64((int64(ecx) * int64(ebx)) / int64(eax))
+ }
+ }
+ // computeHz determines the official rated speed of a CPU from its brand
+ // string. This insanity is *actually the official documented way to do
+ // this according to Intel*, prior to leaf 0x15 existing. The official
+ // documentation only shows this working for exactly `x.xx` or `xxxx`
+ // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
+ // sizes.
+ hz := strings.LastIndex(model, "Hz")
+ if hz < 3 {
+ return 0
+ }
+ var multiplier int64
+ switch model[hz-1] {
+ case 'M':
+ multiplier = 1000 * 1000
+ case 'G':
+ multiplier = 1000 * 1000 * 1000
+ case 'T':
+ multiplier = 1000 * 1000 * 1000 * 1000
+ }
+ if multiplier == 0 {
+ return 0
+ }
+ freq := int64(0)
+ divisor := int64(0)
+ decimalShift := int64(1)
+ var i int
+ for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
+ if model[i] >= '0' && model[i] <= '9' {
+ freq += int64(model[i]-'0') * decimalShift
+ decimalShift *= 10
+ } else if model[i] == '.' {
+ if divisor != 0 {
+ return 0
+ }
+ divisor = decimalShift
+ } else {
+ return 0
+ }
+ }
+ // we didn't find a space
+ if i < 0 {
+ return 0
+ }
+ if divisor != 0 {
+ return (freq * multiplier) / divisor
+ }
+ return freq * multiplier
+}
+
+// VM Will return true if the cpu id indicates we are in
+// a virtual machine.
+func (c CPUInfo) VM() bool {
+ return CPU.featureSet.inSet(HYPERVISOR)
+}
+
+// flags contains detected cpu features and characteristics
+type flags uint64
+
+// log2(bits_in_uint64)
+const flagBitsLog2 = 6
+const flagBits = 1 << flagBitsLog2
+const flagMask = flagBits - 1
+
+// flagSet contains detected cpu features and characteristics in an array of flags
+type flagSet [(lastID + flagMask) / flagBits]flags
+
+func (s flagSet) inSet(feat FeatureID) bool {
+ return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
+}
+
+func (s *flagSet) set(feat FeatureID) {
+ s[feat>>flagBitsLog2] |= 1 << (feat & flagMask)
+}
+
+// setIf will set a feature if boolean is true.
+func (s *flagSet) setIf(cond bool, features ...FeatureID) {
+ if cond {
+ for _, offset := range features {
+ s[offset>>flagBitsLog2] |= 1 << (offset & flagMask)
+ }
+ }
+}
+
+func (s *flagSet) unset(offset FeatureID) {
+ bit := flags(1 << (offset & flagMask))
+ s[offset>>flagBitsLog2] = s[offset>>flagBitsLog2] & ^bit
+}
+
+// or with another flagset.
+func (s *flagSet) or(other flagSet) {
+ for i, v := range other[:] {
+ s[i] |= v
+ }
+}
+
+// ParseFeature will parse the string and return the ID of the matching feature.
+// Will return UNKNOWN if not found.
+func ParseFeature(s string) FeatureID {
+ s = strings.ToUpper(s)
+ for i := firstID; i < lastID; i++ {
+ if i.String() == s {
+ return i
+ }
+ }
+ return UNKNOWN
+}
+
+// Strings returns an array of the detected features for FlagsSet.
+func (s flagSet) Strings() []string {
+ if len(s) == 0 {
+ return []string{""}
+ }
+ r := make([]string, 0)
+ for i := firstID; i < lastID; i++ {
+ if s.inSet(i) {
+ r = append(r, i.String())
+ }
+ }
+ return r
+}
+
+func maxExtendedFunction() uint32 {
+ eax, _, _, _ := cpuid(0x80000000)
+ return eax
+}
+
+func maxFunctionID() uint32 {
+ a, _, _, _ := cpuid(0)
+ return a
+}
+
+func brandName() string {
+ if maxExtendedFunction() >= 0x80000004 {
+ v := make([]uint32, 0, 48)
+ for i := uint32(0); i < 3; i++ {
+ a, b, c, d := cpuid(0x80000002 + i)
+ v = append(v, a, b, c, d)
+ }
+ return strings.Trim(string(valAsString(v...)), " ")
+ }
+ return "unknown"
+}
+
+func threadsPerCore() int {
+ mfi := maxFunctionID()
+ vend, _ := vendorID()
+
+ if mfi < 0x4 || (vend != Intel && vend != AMD) {
+ return 1
+ }
+
+ if mfi < 0xb {
+ if vend != Intel {
+ return 1
+ }
+ _, b, _, d := cpuid(1)
+ if (d & (1 << 28)) != 0 {
+ // v will contain logical core count
+ v := (b >> 16) & 255
+ if v > 1 {
+ a4, _, _, _ := cpuid(4)
+ // physical cores
+ v2 := (a4 >> 26) + 1
+ if v2 > 0 {
+ return int(v) / int(v2)
+ }
+ }
+ }
+ return 1
+ }
+ _, b, _, _ := cpuidex(0xb, 0)
+ if b&0xffff == 0 {
+ if vend == AMD {
+ // Workaround for AMD returning 0, assume 2 if >= Zen 2
+ // It will be more correct than not.
+ fam, _ := familyModel()
+ _, _, _, d := cpuid(1)
+ if (d&(1<<28)) != 0 && fam >= 23 {
+ return 2
+ }
+ }
+ return 1
+ }
+ return int(b & 0xffff)
+}
+
+func logicalCores() int {
+ mfi := maxFunctionID()
+ v, _ := vendorID()
+ switch v {
+ case Intel:
+ // Use this on old Intel processors
+ if mfi < 0xb {
+ if mfi < 1 {
+ return 0
+ }
+ // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
+ // that can be assigned to logical processors in a physical package.
+ // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
+ _, ebx, _, _ := cpuid(1)
+ logical := (ebx >> 16) & 0xff
+ return int(logical)
+ }
+ _, b, _, _ := cpuidex(0xb, 1)
+ return int(b & 0xffff)
+ case AMD, Hygon:
+ _, b, _, _ := cpuid(1)
+ return int((b >> 16) & 0xff)
+ default:
+ return 0
+ }
+}
+
+func familyModel() (int, int) {
+ if maxFunctionID() < 0x1 {
+ return 0, 0
+ }
+ eax, _, _, _ := cpuid(1)
+ family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
+ model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
+ return int(family), int(model)
+}
+
+func physicalCores() int {
+ v, _ := vendorID()
+ switch v {
+ case Intel:
+ return logicalCores() / threadsPerCore()
+ case AMD, Hygon:
+ lc := logicalCores()
+ tpc := threadsPerCore()
+ if lc > 0 && tpc > 0 {
+ return lc / tpc
+ }
+
+ // The following is inaccurate on AMD EPYC 7742 64-Core Processor
+ if maxExtendedFunction() >= 0x80000008 {
+ _, _, c, _ := cpuid(0x80000008)
+ if c&0xff > 0 {
+ return int(c&0xff) + 1
+ }
+ }
+ }
+ return 0
+}
+
+// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
+var vendorMapping = map[string]Vendor{
+ "AMDisbetter!": AMD,
+ "AuthenticAMD": AMD,
+ "CentaurHauls": VIA,
+ "GenuineIntel": Intel,
+ "TransmetaCPU": Transmeta,
+ "GenuineTMx86": Transmeta,
+ "Geode by NSC": NSC,
+ "VIA VIA VIA ": VIA,
+ "KVMKVMKVMKVM": KVM,
+ "Microsoft Hv": MSVM,
+ "VMwareVMware": VMware,
+ "XenVMMXenVMM": XenHVM,
+ "bhyve bhyve ": Bhyve,
+ "HygonGenuine": Hygon,
+ "Vortex86 SoC": SiS,
+ "SiS SiS SiS ": SiS,
+ "RiseRiseRise": SiS,
+ "Genuine RDC": RDC,
+}
+
+func vendorID() (Vendor, string) {
+ _, b, c, d := cpuid(0)
+ v := string(valAsString(b, d, c))
+ vend, ok := vendorMapping[v]
+ if !ok {
+ return VendorUnknown, v
+ }
+ return vend, v
+}
+
+func cacheLine() int {
+ if maxFunctionID() < 0x1 {
+ return 0
+ }
+
+ _, ebx, _, _ := cpuid(1)
+ cache := (ebx & 0xff00) >> 5 // cflush size
+ if cache == 0 && maxExtendedFunction() >= 0x80000006 {
+ _, _, ecx, _ := cpuid(0x80000006)
+ cache = ecx & 0xff // cacheline size
+ }
+ // TODO: Read from Cache and TLB Information
+ return int(cache)
+}
+
+func (c *CPUInfo) cacheSize() {
+ c.Cache.L1D = -1
+ c.Cache.L1I = -1
+ c.Cache.L2 = -1
+ c.Cache.L3 = -1
+ vendor, _ := vendorID()
+ switch vendor {
+ case Intel:
+ if maxFunctionID() < 4 {
+ return
+ }
+ for i := uint32(0); ; i++ {
+ eax, ebx, ecx, _ := cpuidex(4, i)
+ cacheType := eax & 15
+ if cacheType == 0 {
+ break
+ }
+ cacheLevel := (eax >> 5) & 7
+ coherency := int(ebx&0xfff) + 1
+ partitions := int((ebx>>12)&0x3ff) + 1
+ associativity := int((ebx>>22)&0x3ff) + 1
+ sets := int(ecx) + 1
+ size := associativity * partitions * coherency * sets
+ switch cacheLevel {
+ case 1:
+ if cacheType == 1 {
+ // 1 = Data Cache
+ c.Cache.L1D = size
+ } else if cacheType == 2 {
+ // 2 = Instruction Cache
+ c.Cache.L1I = size
+ } else {
+ if c.Cache.L1D < 0 {
+ c.Cache.L1I = size
+ }
+ if c.Cache.L1I < 0 {
+ c.Cache.L1I = size
+ }
+ }
+ case 2:
+ c.Cache.L2 = size
+ case 3:
+ c.Cache.L3 = size
+ }
+ }
+ case AMD, Hygon:
+ // Untested.
+ if maxExtendedFunction() < 0x80000005 {
+ return
+ }
+ _, _, ecx, edx := cpuid(0x80000005)
+ c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
+ c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
+
+ if maxExtendedFunction() < 0x80000006 {
+ return
+ }
+ _, _, ecx, _ = cpuid(0x80000006)
+ c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
+
+ // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
+ if maxExtendedFunction() < 0x8000001D {
+ return
+ }
+ for i := uint32(0); i < math.MaxUint32; i++ {
+ eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
+
+ level := (eax >> 5) & 7
+ cacheNumSets := ecx + 1
+ cacheLineSize := 1 + (ebx & 2047)
+ cachePhysPartitions := 1 + ((ebx >> 12) & 511)
+ cacheNumWays := 1 + ((ebx >> 22) & 511)
+
+ typ := eax & 15
+ size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
+ if typ == 0 {
+ return
+ }
+
+ switch level {
+ case 1:
+ switch typ {
+ case 1:
+ // Data cache
+ c.Cache.L1D = size
+ case 2:
+ // Inst cache
+ c.Cache.L1I = size
+ default:
+ if c.Cache.L1D < 0 {
+ c.Cache.L1I = size
+ }
+ if c.Cache.L1I < 0 {
+ c.Cache.L1I = size
+ }
+ }
+ case 2:
+ c.Cache.L2 = size
+ case 3:
+ c.Cache.L3 = size
+ }
+ }
+ }
+
+ return
+}
+
+type SGXEPCSection struct {
+ BaseAddress uint64
+ EPCSize uint64
+}
+
+type SGXSupport struct {
+ Available bool
+ LaunchControl bool
+ SGX1Supported bool
+ SGX2Supported bool
+ MaxEnclaveSizeNot64 int64
+ MaxEnclaveSize64 int64
+ EPCSections []SGXEPCSection
+}
+
+func hasSGX(available, lc bool) (rval SGXSupport) {
+ rval.Available = available
+
+ if !available {
+ return
+ }
+
+ rval.LaunchControl = lc
+
+ a, _, _, d := cpuidex(0x12, 0)
+ rval.SGX1Supported = a&0x01 != 0
+ rval.SGX2Supported = a&0x02 != 0
+ rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
+ rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
+ rval.EPCSections = make([]SGXEPCSection, 0)
+
+ for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
+ eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
+ leafType := eax & 0xf
+
+ if leafType == 0 {
+ // Invalid subleaf, stop iterating
+ break
+ } else if leafType == 1 {
+ // EPC Section subleaf
+ baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
+ size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
+
+ section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
+ rval.EPCSections = append(rval.EPCSections, section)
+ }
+ }
+
+ return
+}
+
+func support() flagSet {
+ var fs flagSet
+ mfi := maxFunctionID()
+ vend, _ := vendorID()
+ if mfi < 0x1 {
+ return fs
+ }
+ family, model := familyModel()
+
+ _, _, c, d := cpuid(1)
+ fs.setIf((d&(1<<15)) != 0, CMOV)
+ fs.setIf((d&(1<<23)) != 0, MMX)
+ fs.setIf((d&(1<<25)) != 0, MMXEXT)
+ fs.setIf((d&(1<<25)) != 0, SSE)
+ fs.setIf((d&(1<<26)) != 0, SSE2)
+ fs.setIf((c&1) != 0, SSE3)
+ fs.setIf((c&(1<<5)) != 0, VMX)
+ fs.setIf((c&0x00000200) != 0, SSSE3)
+ fs.setIf((c&0x00080000) != 0, SSE4)
+ fs.setIf((c&0x00100000) != 0, SSE42)
+ fs.setIf((c&(1<<25)) != 0, AESNI)
+ fs.setIf((c&(1<<1)) != 0, CLMUL)
+ fs.setIf(c&(1<<23) != 0, POPCNT)
+ fs.setIf(c&(1<<30) != 0, RDRAND)
+
+ // This bit has been reserved by Intel & AMD for use by hypervisors,
+ // and indicates the presence of a hypervisor.
+ fs.setIf(c&(1<<31) != 0, HYPERVISOR)
+ fs.setIf(c&(1<<29) != 0, F16C)
+ fs.setIf(c&(1<<13) != 0, CX16)
+
+ if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
+ fs.setIf(threadsPerCore() > 1, HTT)
+ }
+ if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
+ fs.setIf(threadsPerCore() > 1, HTT)
+ }
+ // Check XGETBV/XSAVE (26), OXSAVE (27) and AVX (28) bits
+ const avxCheck = 1<<26 | 1<<27 | 1<<28
+ if c&avxCheck == avxCheck {
+ // Check for OS support
+ eax, _ := xgetbv(0)
+ if (eax & 0x6) == 0x6 {
+ fs.set(AVX)
+ switch vend {
+ case Intel:
+ // Older than Haswell.
+ fs.setIf(family == 6 && model < 60, AVXSLOW)
+ case AMD:
+ // Older than Zen 2
+ fs.setIf(family < 23 || (family == 23 && model < 49), AVXSLOW)
+ }
+ }
+ }
+ // FMA3 can be used with SSE registers, so no OS support is strictly needed.
+ // fma3 and OSXSAVE needed.
+ const fma3Check = 1<<12 | 1<<27
+ fs.setIf(c&fma3Check == fma3Check, FMA3)
+
+ // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+ if mfi >= 7 {
+ _, ebx, ecx, edx := cpuidex(7, 0)
+ eax1, _, _, _ := cpuidex(7, 1)
+ if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
+ fs.set(AVX2)
+ }
+ // CPUID.(EAX=7, ECX=0).EBX
+ if (ebx & 0x00000008) != 0 {
+ fs.set(BMI1)
+ fs.setIf((ebx&0x00000100) != 0, BMI2)
+ }
+ fs.setIf(ebx&(1<<2) != 0, SGX)
+ fs.setIf(ebx&(1<<4) != 0, HLE)
+ fs.setIf(ebx&(1<<9) != 0, ERMS)
+ fs.setIf(ebx&(1<<11) != 0, RTM)
+ fs.setIf(ebx&(1<<14) != 0, MPX)
+ fs.setIf(ebx&(1<<18) != 0, RDSEED)
+ fs.setIf(ebx&(1<<19) != 0, ADX)
+ fs.setIf(ebx&(1<<29) != 0, SHA)
+ // CPUID.(EAX=7, ECX=0).ECX
+ fs.setIf(ecx&(1<<5) != 0, WAITPKG)
+ fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
+ fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
+ fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
+ fs.setIf(ecx&(1<<29) != 0, ENQCMD)
+ fs.setIf(ecx&(1<<30) != 0, SGXLC)
+ // CPUID.(EAX=7, ECX=0).EDX
+ fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+ fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
+ fs.setIf(edx&(1<<26) != 0, IBPB)
+ fs.setIf(edx&(1<<27) != 0, STIBP)
+
+ // Only detect AVX-512 features if XGETBV is supported
+ if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+ // Check for OS support
+ eax, _ := xgetbv(0)
+
+ // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+ // ZMM16-ZMM31 state are enabled by OS)
+ /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+ hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
+ if runtime.GOOS == "darwin" {
+ hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
+ }
+ if hasAVX512 {
+ fs.setIf(ebx&(1<<16) != 0, AVX512F)
+ fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
+ fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
+ fs.setIf(ebx&(1<<26) != 0, AVX512PF)
+ fs.setIf(ebx&(1<<27) != 0, AVX512ER)
+ fs.setIf(ebx&(1<<28) != 0, AVX512CD)
+ fs.setIf(ebx&(1<<30) != 0, AVX512BW)
+ fs.setIf(ebx&(1<<31) != 0, AVX512VL)
+ // ecx
+ fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
+ fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
+ fs.setIf(ecx&(1<<8) != 0, GFNI)
+ fs.setIf(ecx&(1<<9) != 0, VAES)
+ fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
+ fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
+ fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
+ fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
+ // edx
+ fs.setIf(edx&(1<<8) != 0, AVX512VP2INTERSECT)
+ fs.setIf(edx&(1<<22) != 0, AMXBF16)
+ fs.setIf(edx&(1<<24) != 0, AMXTILE)
+ fs.setIf(edx&(1<<25) != 0, AMXINT8)
+ // eax1 = CPUID.(EAX=7, ECX=1).EAX
+ fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+ }
+ }
+ }
+
+ if maxExtendedFunction() >= 0x80000001 {
+ _, _, c, d := cpuid(0x80000001)
+ if (c & (1 << 5)) != 0 {
+ fs.set(LZCNT)
+ fs.set(POPCNT)
+ }
+ fs.setIf((c&(1<<10)) != 0, IBS)
+ fs.setIf((d&(1<<31)) != 0, AMD3DNOW)
+ fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT)
+ fs.setIf((d&(1<<23)) != 0, MMX)
+ fs.setIf((d&(1<<22)) != 0, MMXEXT)
+ fs.setIf((c&(1<<6)) != 0, SSE4A)
+ fs.setIf(d&(1<<20) != 0, NX)
+ fs.setIf(d&(1<<27) != 0, RDTSCP)
+
+ /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+ * used unless the OS has AVX support. */
+ if fs.inSet(AVX) {
+ fs.setIf((c&0x00000800) != 0, XOP)
+ fs.setIf((c&0x00010000) != 0, FMA4)
+ }
+
+ }
+ if maxExtendedFunction() >= 0x80000008 {
+ _, b, _, _ := cpuid(0x80000008)
+ fs.setIf((b&(1<<9)) != 0, WBNOINVD)
+ }
+
+ if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
+ eax, _, _, _ := cpuid(0x8000001b)
+ fs.setIf((eax>>0)&1 == 1, IBSFFV)
+ fs.setIf((eax>>1)&1 == 1, IBSFETCHSAM)
+ fs.setIf((eax>>2)&1 == 1, IBSOPSAM)
+ fs.setIf((eax>>3)&1 == 1, IBSRDWROPCNT)
+ fs.setIf((eax>>4)&1 == 1, IBSOPCNT)
+ fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
+ fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
+ fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+ }
+
+ return fs
+}
+
+func valAsString(values ...uint32) []byte {
+ r := make([]byte, 4*len(values))
+ for i, v := range values {
+ dst := r[i*4:]
+ dst[0] = byte(v & 0xff)
+ dst[1] = byte((v >> 8) & 0xff)
+ dst[2] = byte((v >> 16) & 0xff)
+ dst[3] = byte((v >> 24) & 0xff)
+ switch {
+ case dst[0] == 0:
+ return r[:i*4]
+ case dst[1] == 0:
+ return r[:i*4+1]
+ case dst[2] == 0:
+ return r[:i*4+2]
+ case dst[3] == 0:
+ return r[:i*4+3]
+ }
+ }
+ return r
+}