summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/klauspost/cpuid/v2/cpuid.go
diff options
context:
space:
mode:
authorWim <wim@42.be>2023-03-09 22:48:00 +0100
committerGitHub <noreply@github.com>2023-03-09 22:48:00 +0100
commit08779c29099e8940493df56d28d8aa131ac8342e (patch)
tree7ad8ce25cf371e582137e1706dd671a6bf4342d0 /vendor/github.com/klauspost/cpuid/v2/cpuid.go
parentd5f9cdf912d43cd2a5cb243e086fbdab9a9073b0 (diff)
downloadmatterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.gz
matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.bz2
matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.zip
Update dependencies (#2007)
* Update dependencies
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
-rw-r--r--vendor/github.com/klauspost/cpuid/v2/cpuid.go361
1 files changed, 318 insertions, 43 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
index 3d543ce9..cf2ae9c5 100644
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -14,6 +14,7 @@ import (
"flag"
"fmt"
"math"
+ "math/bits"
"os"
"runtime"
"strings"
@@ -72,6 +73,7 @@ const (
AMD3DNOW // AMD 3DNOW
AMD3DNOWEXT // AMD 3DNowExt
AMXBF16 // Tile computational operations on BFLOAT16 numbers
+ AMXFP16 // Tile computational operations on FP16 numbers
AMXINT8 // Tile computational operations on 8-bit integers
AMXTILE // Tile architecture
AVX // AVX functions
@@ -92,7 +94,11 @@ const (
AVX512VNNI // AVX-512 Vector Neural Network Instructions
AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
- AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one.
+ AVXIFMA // AVX-IFMA instructions
+ AVXNECONVERT // AVX-NE-CONVERT instructions
+ AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one
+ AVXVNNI // AVX (VEX encoded) VNNI neural network instructions
+ AVXVNNIINT8 // AVX-VNNI-INT8 instructions
BMI1 // Bit Manipulation Instruction Set 1
BMI2 // Bit Manipulation Instruction Set 2
CETIBT // Intel CET Indirect Branch Tracking
@@ -101,22 +107,37 @@ const (
CLMUL // Carry-less Multiplication
CLZERO // CLZERO instruction supported
CMOV // i686 CMOV
+ CMPCCXADD // CMPCCXADD instructions
+ CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB
CMPXCHG8 // CMPXCHG8 instruction
CPBOOST // Core Performance Boost
+ CPPC // AMD: Collaborative Processor Performance Control
CX16 // CMPXCHG16B Instruction
+ EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
ENQCMD // Enqueue Command
ERMS // Enhanced REP MOVSB/STOSB
F16C // Half-precision floating-point conversion
+ FLUSH_L1D // Flush L1D cache
FMA3 // Intel FMA 3. Does not imply AVX.
FMA4 // Bulldozer FMA4 functions
+ FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+ FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+ FSRM // Fast Short Rep Mov
FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9
FXSROPT // FXSAVE/FXRSTOR optimizations
- GFNI // Galois Field New Instructions
+ GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
HLE // Hardware Lock Elision
+ HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
HTT // Hyperthreading (enabled)
HWA // Hardware assert supported. Indicates support for MSRC001_10
+ HYBRID_CPU // This part has CPUs of more than one type.
HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors
+ IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel)
+ IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR
IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+ IBRS // AMD: Indirect Branch Restricted Speculation
+ IBRS_PREFERRED // AMD: IBRS is preferred over software solution
+ IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection
IBS // Instruction Based Sampling (AMD)
IBSBRNTRGT // Instruction Based Sampling Feature (AMD)
IBSFETCHSAM // Instruction Based Sampling Feature (AMD)
@@ -126,33 +147,60 @@ const (
IBSOPSAM // Instruction Based Sampling Feature (AMD)
IBSRDWROPCNT // Instruction Based Sampling Feature (AMD)
IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD)
+ IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported
+ IBS_OPDATA4 // AMD: IBS op data 4 MSR supported
+ IBS_OPFUSE // AMD: Indicates support for IbsOpFuse
+ IBS_PREVENTHOST // Disallowing IBS use by the host supported
+ IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4
INT_WBINVD // WBINVD/WBNOINVD are interruptible.
INVLPGB // NVLPGB and TLBSYNC instruction supported
LAHF // LAHF/SAHF in long mode
+ LAM // If set, CPU supports Linear Address Masking
+ LBRVIRT // LBR virtualization
LZCNT // LZCNT instruction
MCAOVERFLOW // MCA overflow recovery support.
+ MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
MCOMMIT // MCOMMIT instruction supported
+ MD_CLEAR // VERW clears CPU buffers
MMX // standard MMX
MMXEXT // SSE integer functions or AMD MMX ext
MOVBE // MOVBE instruction (big-endian)
MOVDIR64B // Move 64 Bytes as Direct Store
MOVDIRI // Move Doubleword as Direct Store
+ MOVSB_ZL // Fast Zero-Length MOVSB
+ MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
MPX // Intel MPX (Memory Protection Extensions)
MSRIRC // Instruction Retired Counter MSR available
+ MSR_PAGEFLUSH // Page Flush MSR available
+ NRIPS // Indicates support for NRIP save on VMEXIT
NX // NX (No-Execute) bit
OSXSAVE // XSAVE enabled by OS
+ PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption
POPCNT // POPCNT instruction
+ PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+ PREFETCHI // PREFETCHIT0/1 instructions
+ PSFD // AMD: Predictive Store Forward Disable
RDPRU // RDPRU instruction supported
RDRAND // RDRAND instruction is available
RDSEED // RDSEED instruction is available
RDTSCP // RDTSCP Instruction
RTM // Restricted Transactional Memory
RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort.
- SCE // SYSENTER and SYSEXIT instructions
SERIALIZE // Serialize Instruction Execution
+ SEV // AMD Secure Encrypted Virtualization supported
+ SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host
+ SEV_ALTERNATIVE // AMD SEV Alternate Injection supported
+ SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests
+ SEV_ES // AMD SEV Encrypted State supported
+ SEV_RESTRICTED // AMD SEV Restricted Injection supported
+ SEV_SNP // AMD SEV Secure Nested Paging supported
SGX // Software Guard Extensions
SGXLC // Software Guard Extensions Launch Control
SHA // Intel SHA Extensions
+ SME // AMD Secure Memory Encryption supported
+ SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced
+ SPEC_CTRL_SSBD // Speculative Store Bypass Disable
+ SRBDS_CTRL // SRBDS mitigation MSR available
SSE // SSE functions
SSE2 // P4 SSE functions
SSE3 // Prescott SSE3 functions
@@ -161,17 +209,40 @@ const (
SSE4A // AMD Barcelona microarchitecture SSE4a instructions
SSSE3 // Conroe SSSE3 functions
STIBP // Single Thread Indirect Branch Predictors
+ STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
+ STOSB_SHORT // Fast short STOSB
SUCCOR // Software uncorrectable error containment and recovery capability.
+ SVM // AMD Secure Virtual Machine
+ SVMDA // Indicates support for the SVM decode assists.
+ SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+ SVML // AMD SVM lock. Indicates support for SVM-Lock.
+ SVMNP // AMD SVM nested paging
+ SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter
+ SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+ SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+ SYSEE // SYSENTER and SYSEXIT instructions
TBM // AMD Trailing Bit Manipulation
+ TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations
+ TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+ TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+ TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
TSXLDTRK // Intel TSX Suspend Load Address Tracking
- VAES // Vector AES
+ VAES // Vector AES. AVX(512) versions requires additional checks.
+ VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits.
+ VMPL // AMD VM Permission Levels supported
+ VMSA_REGPROT // AMD VMSA Register Protection supported
VMX // Virtual Machine Extensions
- VPCLMULQDQ // Carry-Less Multiplication Quadword
+ VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+ VTE // AMD Virtual Transparent Encryption supported
WAITPKG // TPAUSE, UMONITOR, UMWAIT
WBNOINVD // Write Back and Do Not Invalidate Cache
X87 // FPU
+ XGETBV1 // Supports XGETBV with ECX = 1
XOP // Bulldozer XOP functions
XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV
+ XSAVEC // Supports XSAVEC and the compacted form of XRSTOR.
+ XSAVEOPT // XSAVEOPT available
+ XSAVES // Supports XSAVES/XRSTORS and IA32_XSS
// ARM features:
AESARM // AES instructions
@@ -198,7 +269,6 @@ const (
SM3 // SM3 instructions
SM4 // SM4 instructions
SVE // Scalable Vector Extension
-
// Keep it last. It automatically defines the size of []flagSet
lastID
@@ -216,6 +286,7 @@ type CPUInfo struct {
LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
Family int // CPU family number
Model int // CPU model number
+ Stepping int // CPU stepping info
CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed.
BoostFreq int64 // Max clock speed, if known, 0 otherwise
@@ -318,30 +389,61 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool {
// Has allows for checking a single feature.
// Should be inlined by the compiler.
-func (c CPUInfo) Has(id FeatureID) bool {
+func (c *CPUInfo) Has(id FeatureID) bool {
return c.featureSet.inSet(id)
}
+// AnyOf returns whether the CPU supports one or more of the requested features.
+func (c CPUInfo) AnyOf(ids ...FeatureID) bool {
+ for _, id := range ids {
+ if c.featureSet.inSet(id) {
+ return true
+ }
+ }
+ return false
+}
+
+// Features contains several features combined for a fast check using
+// CpuInfo.HasAll
+type Features *flagSet
+
+// CombineFeatures allows to combine several features for a close to constant time lookup.
+func CombineFeatures(ids ...FeatureID) Features {
+ var v flagSet
+ for _, id := range ids {
+ v.set(id)
+ }
+ return &v
+}
+
+func (c *CPUInfo) HasAll(f Features) bool {
+ return c.featureSet.hasSetP(f)
+}
+
// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
-var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2)
-var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
-var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
-var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
+var oneOfLevel = CombineFeatures(SYSEE, SYSCALL)
+var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2)
+var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3)
+var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE)
+var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL)
// X64Level returns the microarchitecture level detected on the CPU.
// If features are lacking or non x64 mode, 0 is returned.
// See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
func (c CPUInfo) X64Level() int {
- if c.featureSet.hasSet(level4Features) {
+ if !c.featureSet.hasOneOf(oneOfLevel) {
+ return 0
+ }
+ if c.featureSet.hasSetP(level4Features) {
return 4
}
- if c.featureSet.hasSet(level3Features) {
+ if c.featureSet.hasSetP(level3Features) {
return 3
}
- if c.featureSet.hasSet(level2Features) {
+ if c.featureSet.hasSetP(level2Features) {
return 2
}
- if c.featureSet.hasSet(level1Features) {
+ if c.featureSet.hasSetP(level1Features) {
return 1
}
return 0
@@ -369,8 +471,9 @@ func (c CPUInfo) IsVendor(v Vendor) bool {
return c.VendorID == v
}
+// FeatureSet returns all available features as strings.
func (c CPUInfo) FeatureSet() []string {
- s := make([]string, 0)
+ s := make([]string, 0, c.featureSet.nEnabled())
s = append(s, c.featureSet.Strings()...)
return s
}
@@ -504,7 +607,7 @@ const flagMask = flagBits - 1
// flagSet contains detected cpu features and characteristics in an array of flags
type flagSet [(lastID + flagMask) / flagBits]flags
-func (s flagSet) inSet(feat FeatureID) bool {
+func (s *flagSet) inSet(feat FeatureID) bool {
return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0
}
@@ -534,7 +637,7 @@ func (s *flagSet) or(other flagSet) {
}
// hasSet returns whether all features are present.
-func (s flagSet) hasSet(other flagSet) bool {
+func (s *flagSet) hasSet(other flagSet) bool {
for i, v := range other[:] {
if s[i]&v != v {
return false
@@ -543,6 +646,34 @@ func (s flagSet) hasSet(other flagSet) bool {
return true
}
+// hasSet returns whether all features are present.
+func (s *flagSet) hasSetP(other *flagSet) bool {
+ for i, v := range other[:] {
+ if s[i]&v != v {
+ return false
+ }
+ }
+ return true
+}
+
+// hasOneOf returns whether one or more features are present.
+func (s *flagSet) hasOneOf(other *flagSet) bool {
+ for i, v := range other[:] {
+ if s[i]&v != 0 {
+ return true
+ }
+ }
+ return false
+}
+
+// nEnabled will return the number of enabled flags.
+func (s *flagSet) nEnabled() (n int) {
+ for _, v := range s[:] {
+ n += bits.OnesCount64(uint64(v))
+ }
+ return n
+}
+
func flagSetWith(feat ...FeatureID) flagSet {
var res flagSet
for _, f := range feat {
@@ -631,7 +762,7 @@ func threadsPerCore() int {
if vend == AMD {
// Workaround for AMD returning 0, assume 2 if >= Zen 2
// It will be more correct than not.
- fam, _ := familyModel()
+ fam, _, _ := familyModel()
_, _, _, d := cpuid(1)
if (d&(1<<28)) != 0 && fam >= 23 {
return 2
@@ -669,14 +800,27 @@ func logicalCores() int {
}
}
-func familyModel() (int, int) {
+func familyModel() (family, model, stepping int) {
if maxFunctionID() < 0x1 {
- return 0, 0
+ return 0, 0, 0
}
eax, _, _, _ := cpuid(1)
- family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
- model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
- return int(family), int(model)
+ // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0].
+ family = int((eax >> 8) & 0xf)
+ extFam := family == 0x6 // Intel is 0x6, needs extended model.
+ if family == 0xf {
+ // Add ExtFamily
+ family += int((eax >> 20) & 0xff)
+ extFam = true
+ }
+ // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0].
+ model = int((eax >> 4) & 0xf)
+ if extFam {
+ // Add ExtModel
+ model += int((eax >> 12) & 0xf0)
+ }
+ stepping = int(eax & 0xf)
+ return family, model, stepping
}
func physicalCores() int {
@@ -811,9 +955,14 @@ func (c *CPUInfo) cacheSize() {
c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
// CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
- if maxExtendedFunction() < 0x8000001D {
+ if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) {
return
}
+
+ // Xen Hypervisor is buggy and returns the same entry no matter ECX value.
+ // Hack: When we encounter the same entry 100 times we break.
+ nSame := 0
+ var last uint32
for i := uint32(0); i < math.MaxUint32; i++ {
eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
@@ -829,6 +978,16 @@ func (c *CPUInfo) cacheSize() {
return
}
+ // Check for the same value repeated.
+ comb := eax ^ ebx ^ ecx
+ if comb == last {
+ nSame++
+ if nSame == 100 {
+ return
+ }
+ }
+ last = comb
+
switch level {
case 1:
switch typ {
@@ -913,14 +1072,13 @@ func support() flagSet {
if mfi < 0x1 {
return fs
}
- family, model := familyModel()
+ family, model, _ := familyModel()
_, _, c, d := cpuid(1)
fs.setIf((d&(1<<0)) != 0, X87)
fs.setIf((d&(1<<8)) != 0, CMPXCHG8)
- fs.setIf((d&(1<<11)) != 0, SCE)
+ fs.setIf((d&(1<<11)) != 0, SYSEE)
fs.setIf((d&(1<<15)) != 0, CMOV)
- fs.setIf((d&(1<<22)) != 0, MMXEXT)
fs.setIf((d&(1<<23)) != 0, MMX)
fs.setIf((d&(1<<24)) != 0, FXSR)
fs.setIf((d&(1<<25)) != 0, FXSROPT)
@@ -928,9 +1086,9 @@ func support() flagSet {
fs.setIf((d&(1<<26)) != 0, SSE2)
fs.setIf((c&1) != 0, SSE3)
fs.setIf((c&(1<<5)) != 0, VMX)
- fs.setIf((c&0x00000200) != 0, SSSE3)
- fs.setIf((c&0x00080000) != 0, SSE4)
- fs.setIf((c&0x00100000) != 0, SSE42)
+ fs.setIf((c&(1<<9)) != 0, SSSE3)
+ fs.setIf((c&(1<<19)) != 0, SSE4)
+ fs.setIf((c&(1<<20)) != 0, SSE42)
fs.setIf((c&(1<<25)) != 0, AESNI)
fs.setIf((c&(1<<1)) != 0, CLMUL)
fs.setIf(c&(1<<22) != 0, MOVBE)
@@ -976,7 +1134,6 @@ func support() flagSet {
// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
if mfi >= 7 {
_, ebx, ecx, edx := cpuidex(7, 0)
- eax1, _, _, _ := cpuidex(7, 1)
if fs.inSet(AVX) && (ebx&0x00000020) != 0 {
fs.set(AVX2)
}
@@ -993,21 +1150,52 @@ func support() flagSet {
fs.setIf(ebx&(1<<18) != 0, RDSEED)
fs.setIf(ebx&(1<<19) != 0, ADX)
fs.setIf(ebx&(1<<29) != 0, SHA)
+
// CPUID.(EAX=7, ECX=0).ECX
fs.setIf(ecx&(1<<5) != 0, WAITPKG)
fs.setIf(ecx&(1<<7) != 0, CETSS)
+ fs.setIf(ecx&(1<<8) != 0, GFNI)
+ fs.setIf(ecx&(1<<9) != 0, VAES)
+ fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
+ fs.setIf(ecx&(1<<13) != 0, TME)
fs.setIf(ecx&(1<<25) != 0, CLDEMOTE)
fs.setIf(ecx&(1<<27) != 0, MOVDIRI)
fs.setIf(ecx&(1<<28) != 0, MOVDIR64B)
fs.setIf(ecx&(1<<29) != 0, ENQCMD)
fs.setIf(ecx&(1<<30) != 0, SGXLC)
+
// CPUID.(EAX=7, ECX=0).EDX
+ fs.setIf(edx&(1<<4) != 0, FSRM)
+ fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL)
+ fs.setIf(edx&(1<<10) != 0, MD_CLEAR)
fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT)
fs.setIf(edx&(1<<14) != 0, SERIALIZE)
+ fs.setIf(edx&(1<<15) != 0, HYBRID_CPU)
fs.setIf(edx&(1<<16) != 0, TSXLDTRK)
+ fs.setIf(edx&(1<<18) != 0, PCONFIG)
fs.setIf(edx&(1<<20) != 0, CETIBT)
fs.setIf(edx&(1<<26) != 0, IBPB)
fs.setIf(edx&(1<<27) != 0, STIBP)
+ fs.setIf(edx&(1<<28) != 0, FLUSH_L1D)
+ fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP)
+ fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP)
+ fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD)
+
+ // CPUID.(EAX=7, ECX=1).EDX
+ fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8)
+ fs.setIf(edx&(1<<5) != 0, AVXNECONVERT)
+ fs.setIf(edx&(1<<14) != 0, PREFETCHI)
+
+ // CPUID.(EAX=7, ECX=1).EAX
+ eax1, _, _, _ := cpuidex(7, 1)
+ fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI)
+ fs.setIf(eax1&(1<<7) != 0, CMPCCXADD)
+ fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL)
+ fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT)
+ fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT)
+ fs.setIf(eax1&(1<<22) != 0, HRESET)
+ fs.setIf(eax1&(1<<23) != 0, AVXIFMA)
+ fs.setIf(eax1&(1<<26) != 0, LAM)
// Only detect AVX-512 features if XGETBV is supported
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
@@ -1033,9 +1221,6 @@ func support() flagSet {
// ecx
fs.setIf(ecx&(1<<1) != 0, AVX512VBMI)
fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2)
- fs.setIf(ecx&(1<<8) != 0, GFNI)
- fs.setIf(ecx&(1<<9) != 0, VAES)
- fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ)
fs.setIf(ecx&(1<<11) != 0, AVX512VNNI)
fs.setIf(ecx&(1<<12) != 0, AVX512BITALG)
fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ)
@@ -1047,31 +1232,66 @@ func support() flagSet {
fs.setIf(edx&(1<<25) != 0, AMXINT8)
// eax1 = CPUID.(EAX=7, ECX=1).EAX
fs.setIf(eax1&(1<<5) != 0, AVX512BF16)
+ fs.setIf(eax1&(1<<21) != 0, AMXFP16)
}
}
+
+ // CPUID.(EAX=7, ECX=2)
+ _, _, _, edx = cpuidex(7, 2)
+ fs.setIf(edx&(1<<5) != 0, MCDT_NO)
}
+ // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1)
+ // EAX
+ // Bit 00: XSAVEOPT is available.
+ // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set.
+ // Bit 02: Supports XGETBV with ECX = 1 if set.
+ // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set.
+ // Bits 31 - 04: Reserved.
+ // EBX
+ // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS.
+ // ECX
+ // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1.
+ // EDX?
+ // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved.
+ if mfi >= 0xd {
+ if fs.inSet(XSAVE) {
+ eax, _, _, _ := cpuidex(0xd, 1)
+ fs.setIf(eax&(1<<0) != 0, XSAVEOPT)
+ fs.setIf(eax&(1<<1) != 0, XSAVEC)
+ fs.setIf(eax&(1<<2) != 0, XGETBV1)
+ fs.setIf(eax&(1<<3) != 0, XSAVES)
+ }
+ }
if maxExtendedFunction() >= 0x80000001 {
_, _, c, d := cpuid(0x80000001)
if (c & (1 << 5)) != 0 {
fs.set(LZCNT)
fs.set(POPCNT)
}
+ // ECX
fs.setIf((c&(1<<0)) != 0, LAHF)
- fs.setIf((c&(1<<10)) != 0, IBS)
- fs.setIf((d&(1<<31)) != 0, AMD3DNOW)
- fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT)
- fs.setIf((d&(1<<23)) != 0, MMX)
- fs.setIf((d&(1<<22)) != 0, MMXEXT)
+ fs.setIf((c&(1<<2)) != 0, SVM)
fs.setIf((c&(1<<6)) != 0, SSE4A)
+ fs.setIf((c&(1<<10)) != 0, IBS)
+ fs.setIf((c&(1<<22)) != 0, TOPEXT)
+
+ // EDX
+ fs.setIf(d&(1<<11) != 0, SYSCALL)
fs.setIf(d&(1<<20) != 0, NX)
+ fs.setIf(d&(1<<22) != 0, MMXEXT)
+ fs.setIf(d&(1<<23) != 0, MMX)
+ fs.setIf(d&(1<<24) != 0, FXSR)
+ fs.setIf(d&(1<<25) != 0, FXSROPT)
fs.setIf(d&(1<<27) != 0, RDTSCP)
+ fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT)
+ fs.setIf(d&(1<<31) != 0, AMD3DNOW)
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
* used unless the OS has AVX support. */
if fs.inSet(AVX) {
- fs.setIf((c&0x00000800) != 0, XOP)
- fs.setIf((c&0x00010000) != 0, FMA4)
+ fs.setIf((c&(1<<11)) != 0, XOP)
+ fs.setIf((c&(1<<16)) != 0, FMA4)
}
}
@@ -1085,15 +1305,48 @@ func support() flagSet {
if maxExtendedFunction() >= 0x80000008 {
_, b, _, _ := cpuid(0x80000008)
+ fs.setIf(b&(1<<28) != 0, PSFD)
+ fs.setIf(b&(1<<27) != 0, CPPC)
+ fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD)
+ fs.setIf(b&(1<<23) != 0, PPIN)
+ fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED)
+ fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS)
+ fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP)
+ fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED)
+ fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON)
+ fs.setIf(b&(1<<15) != 0, STIBP)
+ fs.setIf(b&(1<<14) != 0, IBRS)
+ fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
+ fs.setIf(b&(1<<12) != 0, IBPB)
fs.setIf((b&(1<<9)) != 0, WBNOINVD)
fs.setIf((b&(1<<8)) != 0, MCOMMIT)
- fs.setIf((b&(1<<13)) != 0, INT_WBINVD)
fs.setIf((b&(1<<4)) != 0, RDPRU)
fs.setIf((b&(1<<3)) != 0, INVLPGB)
fs.setIf((b&(1<<1)) != 0, MSRIRC)
fs.setIf((b&(1<<0)) != 0, CLZERO)
}
+ if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A {
+ _, _, _, edx := cpuid(0x8000000A)
+ fs.setIf((edx>>0)&1 == 1, SVMNP)
+ fs.setIf((edx>>1)&1 == 1, LBRVIRT)
+ fs.setIf((edx>>2)&1 == 1, SVML)
+ fs.setIf((edx>>3)&1 == 1, NRIPS)
+ fs.setIf((edx>>4)&1 == 1, TSCRATEMSR)
+ fs.setIf((edx>>5)&1 == 1, VMCBCLEAN)
+ fs.setIf((edx>>6)&1 == 1, SVMFBASID)
+ fs.setIf((edx>>7)&1 == 1, SVMDA)
+ fs.setIf((edx>>10)&1 == 1, SVMPF)
+ fs.setIf((edx>>12)&1 == 1, SVMPFT)
+ }
+
+ if maxExtendedFunction() >= 0x8000001a {
+ eax, _, _, _ := cpuid(0x8000001a)
+ fs.setIf((eax>>0)&1 == 1, FP128)
+ fs.setIf((eax>>1)&1 == 1, MOVU)
+ fs.setIf((eax>>2)&1 == 1, FP256)
+ }
+
if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) {
eax, _, _, _ := cpuid(0x8000001b)
fs.setIf((eax>>0)&1 == 1, IBSFFV)
@@ -1104,6 +1357,28 @@ func support() flagSet {
fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT)
fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT)
fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK)
+ fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE)
+ fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX)
+ fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1.
+ fs.setIf((eax>>11)&1 == 1, IBS_ZEN4)
+ }
+
+ if maxExtendedFunction() >= 0x8000001f && vend == AMD {
+ a, _, _, _ := cpuid(0x8000001f)
+ fs.setIf((a>>0)&1 == 1, SME)
+ fs.setIf((a>>1)&1 == 1, SEV)
+ fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH)
+ fs.setIf((a>>3)&1 == 1, SEV_ES)
+ fs.setIf((a>>4)&1 == 1, SEV_SNP)
+ fs.setIf((a>>5)&1 == 1, VMPL)
+ fs.setIf((a>>10)&1 == 1, SME_COHERENT)
+ fs.setIf((a>>11)&1 == 1, SEV_64BIT)
+ fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED)
+ fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE)
+ fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP)
+ fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST)
+ fs.setIf((a>>16)&1 == 1, VTE)
+ fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
}
return fs