diff options
author | Wim <wim@42.be> | 2023-03-09 22:48:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-09 22:48:00 +0100 |
commit | 08779c29099e8940493df56d28d8aa131ac8342e (patch) | |
tree | 7ad8ce25cf371e582137e1706dd671a6bf4342d0 /vendor/github.com/klauspost/cpuid/v2/cpuid.go | |
parent | d5f9cdf912d43cd2a5cb243e086fbdab9a9073b0 (diff) | |
download | matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.gz matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.tar.bz2 matterbridge-msglm-08779c29099e8940493df56d28d8aa131ac8342e.zip |
Update dependencies (#2007)
* Update dependencies
Diffstat (limited to 'vendor/github.com/klauspost/cpuid/v2/cpuid.go')
-rw-r--r-- | vendor/github.com/klauspost/cpuid/v2/cpuid.go | 361 |
1 files changed, 318 insertions, 43 deletions
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go index 3d543ce9..cf2ae9c5 100644 --- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -14,6 +14,7 @@ import ( "flag" "fmt" "math" + "math/bits" "os" "runtime" "strings" @@ -72,6 +73,7 @@ const ( AMD3DNOW // AMD 3DNOW AMD3DNOWEXT // AMD 3DNowExt AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXFP16 // Tile computational operations on FP16 numbers AMXINT8 // Tile computational operations on 8-bit integers AMXTILE // Tile architecture AVX // AVX functions @@ -92,7 +94,11 @@ const ( AVX512VNNI // AVX-512 Vector Neural Network Instructions AVX512VP2INTERSECT // AVX-512 Intersect for D/Q AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword - AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one. + AVXIFMA // AVX-IFMA instructions + AVXNECONVERT // AVX-NE-CONVERT instructions + AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one + AVXVNNI // AVX (VEX encoded) VNNI neural network instructions + AVXVNNIINT8 // AVX-VNNI-INT8 instructions BMI1 // Bit Manipulation Instruction Set 1 BMI2 // Bit Manipulation Instruction Set 2 CETIBT // Intel CET Indirect Branch Tracking @@ -101,22 +107,37 @@ const ( CLMUL // Carry-less Multiplication CLZERO // CLZERO instruction supported CMOV // i686 CMOV + CMPCCXADD // CMPCCXADD instructions + CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB CMPXCHG8 // CMPXCHG8 instruction CPBOOST // Core Performance Boost + CPPC // AMD: Collaborative Processor Performance Control CX16 // CMPXCHG16B Instruction + EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ ENQCMD // Enqueue Command ERMS // Enhanced REP MOVSB/STOSB F16C // Half-precision floating-point conversion + FLUSH_L1D // Flush L1D cache FMA3 // Intel FMA 3. Does not imply AVX. FMA4 // Bulldozer FMA4 functions + FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide + FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide + FSRM // Fast Short Rep Mov FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 FXSROPT // FXSAVE/FXRSTOR optimizations - GFNI // Galois Field New Instructions + GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. HLE // Hardware Lock Elision + HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR HTT // Hyperthreading (enabled) HWA // Hardware assert supported. Indicates support for MSRC001_10 + HYBRID_CPU // This part has CPUs of more than one type. HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) + IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBRS // AMD: Indirect Branch Restricted Speculation + IBRS_PREFERRED // AMD: IBRS is preferred over software solution + IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection IBS // Instruction Based Sampling (AMD) IBSBRNTRGT // Instruction Based Sampling Feature (AMD) IBSFETCHSAM // Instruction Based Sampling Feature (AMD) @@ -126,33 +147,60 @@ const ( IBSOPSAM // Instruction Based Sampling Feature (AMD) IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported + IBS_OPDATA4 // AMD: IBS op data 4 MSR supported + IBS_OPFUSE // AMD: Indicates support for IbsOpFuse + IBS_PREVENTHOST // Disallowing IBS use by the host supported + IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 INT_WBINVD // WBINVD/WBNOINVD are interruptible. INVLPGB // NVLPGB and TLBSYNC instruction supported LAHF // LAHF/SAHF in long mode + LAM // If set, CPU supports Linear Address Masking + LBRVIRT // LBR virtualization LZCNT // LZCNT instruction MCAOVERFLOW // MCA overflow recovery support. + MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. MCOMMIT // MCOMMIT instruction supported + MD_CLEAR // VERW clears CPU buffers MMX // standard MMX MMXEXT // SSE integer functions or AMD MMX ext MOVBE // MOVBE instruction (big-endian) MOVDIR64B // Move 64 Bytes as Direct Store MOVDIRI // Move Doubleword as Direct Store + MOVSB_ZL // Fast Zero-Length MOVSB + MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD MPX // Intel MPX (Memory Protection Extensions) MSRIRC // Instruction Retired Counter MSR available + MSR_PAGEFLUSH // Page Flush MSR available + NRIPS // Indicates support for NRIP save on VMEXIT NX // NX (No-Execute) bit OSXSAVE // XSAVE enabled by OS + PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption POPCNT // POPCNT instruction + PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled + PREFETCHI // PREFETCHIT0/1 instructions + PSFD // AMD: Predictive Store Forward Disable RDPRU // RDPRU instruction supported RDRAND // RDRAND instruction is available RDSEED // RDSEED instruction is available RDTSCP // RDTSCP Instruction RTM // Restricted Transactional Memory RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. - SCE // SYSENTER and SYSEXIT instructions SERIALIZE // Serialize Instruction Execution + SEV // AMD Secure Encrypted Virtualization supported + SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host + SEV_ALTERNATIVE // AMD SEV Alternate Injection supported + SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests + SEV_ES // AMD SEV Encrypted State supported + SEV_RESTRICTED // AMD SEV Restricted Injection supported + SEV_SNP // AMD SEV Secure Nested Paging supported SGX // Software Guard Extensions SGXLC // Software Guard Extensions Launch Control SHA // Intel SHA Extensions + SME // AMD Secure Memory Encryption supported + SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced + SPEC_CTRL_SSBD // Speculative Store Bypass Disable + SRBDS_CTRL // SRBDS mitigation MSR available SSE // SSE functions SSE2 // P4 SSE functions SSE3 // Prescott SSE3 functions @@ -161,17 +209,40 @@ const ( SSE4A // AMD Barcelona microarchitecture SSE4a instructions SSSE3 // Conroe SSSE3 functions STIBP // Single Thread Indirect Branch Predictors + STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On + STOSB_SHORT // Fast short STOSB SUCCOR // Software uncorrectable error containment and recovery capability. + SVM // AMD Secure Virtual Machine + SVMDA // Indicates support for the SVM decode assists. + SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control + SVML // AMD SVM lock. Indicates support for SVM-Lock. + SVMNP // AMD SVM nested paging + SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter + SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold + SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. + SYSEE // SYSENTER and SYSEXIT instructions TBM // AMD Trailing Bit Manipulation + TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations + TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. + TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 TSXLDTRK // Intel TSX Suspend Load Address Tracking - VAES // Vector AES + VAES // Vector AES. AVX(512) versions requires additional checks. + VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. + VMPL // AMD VM Permission Levels supported + VMSA_REGPROT // AMD VMSA Register Protection supported VMX // Virtual Machine Extensions - VPCLMULQDQ // Carry-Less Multiplication Quadword + VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. + VTE // AMD Virtual Transparent Encryption supported WAITPKG // TPAUSE, UMONITOR, UMWAIT WBNOINVD // Write Back and Do Not Invalidate Cache X87 // FPU + XGETBV1 // Supports XGETBV with ECX = 1 XOP // Bulldozer XOP functions XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV + XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. + XSAVEOPT // XSAVEOPT available + XSAVES // Supports XSAVES/XRSTORS and IA32_XSS // ARM features: AESARM // AES instructions @@ -198,7 +269,6 @@ const ( SM3 // SM3 instructions SM4 // SM4 instructions SVE // Scalable Vector Extension - // Keep it last. It automatically defines the size of []flagSet lastID @@ -216,6 +286,7 @@ type CPUInfo struct { LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. Family int // CPU family number Model int // CPU model number + Stepping int // CPU stepping info CacheLine int // Cache line size in bytes. Will be 0 if undetectable. Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. BoostFreq int64 // Max clock speed, if known, 0 otherwise @@ -318,30 +389,61 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool { // Has allows for checking a single feature. // Should be inlined by the compiler. -func (c CPUInfo) Has(id FeatureID) bool { +func (c *CPUInfo) Has(id FeatureID) bool { return c.featureSet.inSet(id) } +// AnyOf returns whether the CPU supports one or more of the requested features. +func (c CPUInfo) AnyOf(ids ...FeatureID) bool { + for _, id := range ids { + if c.featureSet.inSet(id) { + return true + } + } + return false +} + +// Features contains several features combined for a fast check using +// CpuInfo.HasAll +type Features *flagSet + +// CombineFeatures allows to combine several features for a close to constant time lookup. +func CombineFeatures(ids ...FeatureID) Features { + var v flagSet + for _, id := range ids { + v.set(id) + } + return &v +} + +func (c *CPUInfo) HasAll(f Features) bool { + return c.featureSet.hasSetP(f) +} + // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels -var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2) -var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) -var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) -var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) +var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) +var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) +var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) +var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) +var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) // X64Level returns the microarchitecture level detected on the CPU. // If features are lacking or non x64 mode, 0 is returned. // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels func (c CPUInfo) X64Level() int { - if c.featureSet.hasSet(level4Features) { + if !c.featureSet.hasOneOf(oneOfLevel) { + return 0 + } + if c.featureSet.hasSetP(level4Features) { return 4 } - if c.featureSet.hasSet(level3Features) { + if c.featureSet.hasSetP(level3Features) { return 3 } - if c.featureSet.hasSet(level2Features) { + if c.featureSet.hasSetP(level2Features) { return 2 } - if c.featureSet.hasSet(level1Features) { + if c.featureSet.hasSetP(level1Features) { return 1 } return 0 @@ -369,8 +471,9 @@ func (c CPUInfo) IsVendor(v Vendor) bool { return c.VendorID == v } +// FeatureSet returns all available features as strings. func (c CPUInfo) FeatureSet() []string { - s := make([]string, 0) + s := make([]string, 0, c.featureSet.nEnabled()) s = append(s, c.featureSet.Strings()...) return s } @@ -504,7 +607,7 @@ const flagMask = flagBits - 1 // flagSet contains detected cpu features and characteristics in an array of flags type flagSet [(lastID + flagMask) / flagBits]flags -func (s flagSet) inSet(feat FeatureID) bool { +func (s *flagSet) inSet(feat FeatureID) bool { return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 } @@ -534,7 +637,7 @@ func (s *flagSet) or(other flagSet) { } // hasSet returns whether all features are present. -func (s flagSet) hasSet(other flagSet) bool { +func (s *flagSet) hasSet(other flagSet) bool { for i, v := range other[:] { if s[i]&v != v { return false @@ -543,6 +646,34 @@ func (s flagSet) hasSet(other flagSet) bool { return true } +// hasSet returns whether all features are present. +func (s *flagSet) hasSetP(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != v { + return false + } + } + return true +} + +// hasOneOf returns whether one or more features are present. +func (s *flagSet) hasOneOf(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != 0 { + return true + } + } + return false +} + +// nEnabled will return the number of enabled flags. +func (s *flagSet) nEnabled() (n int) { + for _, v := range s[:] { + n += bits.OnesCount64(uint64(v)) + } + return n +} + func flagSetWith(feat ...FeatureID) flagSet { var res flagSet for _, f := range feat { @@ -631,7 +762,7 @@ func threadsPerCore() int { if vend == AMD { // Workaround for AMD returning 0, assume 2 if >= Zen 2 // It will be more correct than not. - fam, _ := familyModel() + fam, _, _ := familyModel() _, _, _, d := cpuid(1) if (d&(1<<28)) != 0 && fam >= 23 { return 2 @@ -669,14 +800,27 @@ func logicalCores() int { } } -func familyModel() (int, int) { +func familyModel() (family, model, stepping int) { if maxFunctionID() < 0x1 { - return 0, 0 + return 0, 0, 0 } eax, _, _, _ := cpuid(1) - family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) - model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) - return int(family), int(model) + // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. + family = int((eax >> 8) & 0xf) + extFam := family == 0x6 // Intel is 0x6, needs extended model. + if family == 0xf { + // Add ExtFamily + family += int((eax >> 20) & 0xff) + extFam = true + } + // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. + model = int((eax >> 4) & 0xf) + if extFam { + // Add ExtModel + model += int((eax >> 12) & 0xf0) + } + stepping = int(eax & 0xf) + return family, model, stepping } func physicalCores() int { @@ -811,9 +955,14 @@ func (c *CPUInfo) cacheSize() { c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties - if maxExtendedFunction() < 0x8000001D { + if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { return } + + // Xen Hypervisor is buggy and returns the same entry no matter ECX value. + // Hack: When we encounter the same entry 100 times we break. + nSame := 0 + var last uint32 for i := uint32(0); i < math.MaxUint32; i++ { eax, ebx, ecx, _ := cpuidex(0x8000001D, i) @@ -829,6 +978,16 @@ func (c *CPUInfo) cacheSize() { return } + // Check for the same value repeated. + comb := eax ^ ebx ^ ecx + if comb == last { + nSame++ + if nSame == 100 { + return + } + } + last = comb + switch level { case 1: switch typ { @@ -913,14 +1072,13 @@ func support() flagSet { if mfi < 0x1 { return fs } - family, model := familyModel() + family, model, _ := familyModel() _, _, c, d := cpuid(1) fs.setIf((d&(1<<0)) != 0, X87) fs.setIf((d&(1<<8)) != 0, CMPXCHG8) - fs.setIf((d&(1<<11)) != 0, SCE) + fs.setIf((d&(1<<11)) != 0, SYSEE) fs.setIf((d&(1<<15)) != 0, CMOV) - fs.setIf((d&(1<<22)) != 0, MMXEXT) fs.setIf((d&(1<<23)) != 0, MMX) fs.setIf((d&(1<<24)) != 0, FXSR) fs.setIf((d&(1<<25)) != 0, FXSROPT) @@ -928,9 +1086,9 @@ func support() flagSet { fs.setIf((d&(1<<26)) != 0, SSE2) fs.setIf((c&1) != 0, SSE3) fs.setIf((c&(1<<5)) != 0, VMX) - fs.setIf((c&0x00000200) != 0, SSSE3) - fs.setIf((c&0x00080000) != 0, SSE4) - fs.setIf((c&0x00100000) != 0, SSE42) + fs.setIf((c&(1<<9)) != 0, SSSE3) + fs.setIf((c&(1<<19)) != 0, SSE4) + fs.setIf((c&(1<<20)) != 0, SSE42) fs.setIf((c&(1<<25)) != 0, AESNI) fs.setIf((c&(1<<1)) != 0, CLMUL) fs.setIf(c&(1<<22) != 0, MOVBE) @@ -976,7 +1134,6 @@ func support() flagSet { // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. if mfi >= 7 { _, ebx, ecx, edx := cpuidex(7, 0) - eax1, _, _, _ := cpuidex(7, 1) if fs.inSet(AVX) && (ebx&0x00000020) != 0 { fs.set(AVX2) } @@ -993,21 +1150,52 @@ func support() flagSet { fs.setIf(ebx&(1<<18) != 0, RDSEED) fs.setIf(ebx&(1<<19) != 0, ADX) fs.setIf(ebx&(1<<29) != 0, SHA) + // CPUID.(EAX=7, ECX=0).ECX fs.setIf(ecx&(1<<5) != 0, WAITPKG) fs.setIf(ecx&(1<<7) != 0, CETSS) + fs.setIf(ecx&(1<<8) != 0, GFNI) + fs.setIf(ecx&(1<<9) != 0, VAES) + fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) + fs.setIf(ecx&(1<<13) != 0, TME) fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) fs.setIf(ecx&(1<<27) != 0, MOVDIRI) fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) fs.setIf(ecx&(1<<29) != 0, ENQCMD) fs.setIf(ecx&(1<<30) != 0, SGXLC) + // CPUID.(EAX=7, ECX=0).EDX + fs.setIf(edx&(1<<4) != 0, FSRM) + fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) + fs.setIf(edx&(1<<10) != 0, MD_CLEAR) fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) fs.setIf(edx&(1<<14) != 0, SERIALIZE) + fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) fs.setIf(edx&(1<<16) != 0, TSXLDTRK) + fs.setIf(edx&(1<<18) != 0, PCONFIG) fs.setIf(edx&(1<<20) != 0, CETIBT) fs.setIf(edx&(1<<26) != 0, IBPB) fs.setIf(edx&(1<<27) != 0, STIBP) + fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) + fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) + fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) + fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) + + // CPUID.(EAX=7, ECX=1).EDX + fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8) + fs.setIf(edx&(1<<5) != 0, AVXNECONVERT) + fs.setIf(edx&(1<<14) != 0, PREFETCHI) + + // CPUID.(EAX=7, ECX=1).EAX + eax1, _, _, _ := cpuidex(7, 1) + fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) + fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) + fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) + fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) + fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) + fs.setIf(eax1&(1<<22) != 0, HRESET) + fs.setIf(eax1&(1<<23) != 0, AVXIFMA) + fs.setIf(eax1&(1<<26) != 0, LAM) // Only detect AVX-512 features if XGETBV is supported if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { @@ -1033,9 +1221,6 @@ func support() flagSet { // ecx fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) - fs.setIf(ecx&(1<<8) != 0, GFNI) - fs.setIf(ecx&(1<<9) != 0, VAES) - fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) @@ -1047,31 +1232,66 @@ func support() flagSet { fs.setIf(edx&(1<<25) != 0, AMXINT8) // eax1 = CPUID.(EAX=7, ECX=1).EAX fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + fs.setIf(eax1&(1<<21) != 0, AMXFP16) } } + + // CPUID.(EAX=7, ECX=2) + _, _, _, edx = cpuidex(7, 2) + fs.setIf(edx&(1<<5) != 0, MCDT_NO) } + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) + // EAX + // Bit 00: XSAVEOPT is available. + // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. + // Bit 02: Supports XGETBV with ECX = 1 if set. + // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. + // Bits 31 - 04: Reserved. + // EBX + // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. + // ECX + // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. + // EDX? + // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. + if mfi >= 0xd { + if fs.inSet(XSAVE) { + eax, _, _, _ := cpuidex(0xd, 1) + fs.setIf(eax&(1<<0) != 0, XSAVEOPT) + fs.setIf(eax&(1<<1) != 0, XSAVEC) + fs.setIf(eax&(1<<2) != 0, XGETBV1) + fs.setIf(eax&(1<<3) != 0, XSAVES) + } + } if maxExtendedFunction() >= 0x80000001 { _, _, c, d := cpuid(0x80000001) if (c & (1 << 5)) != 0 { fs.set(LZCNT) fs.set(POPCNT) } + // ECX fs.setIf((c&(1<<0)) != 0, LAHF) - fs.setIf((c&(1<<10)) != 0, IBS) - fs.setIf((d&(1<<31)) != 0, AMD3DNOW) - fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT) - fs.setIf((d&(1<<23)) != 0, MMX) - fs.setIf((d&(1<<22)) != 0, MMXEXT) + fs.setIf((c&(1<<2)) != 0, SVM) fs.setIf((c&(1<<6)) != 0, SSE4A) + fs.setIf((c&(1<<10)) != 0, IBS) + fs.setIf((c&(1<<22)) != 0, TOPEXT) + + // EDX + fs.setIf(d&(1<<11) != 0, SYSCALL) fs.setIf(d&(1<<20) != 0, NX) + fs.setIf(d&(1<<22) != 0, MMXEXT) + fs.setIf(d&(1<<23) != 0, MMX) + fs.setIf(d&(1<<24) != 0, FXSR) + fs.setIf(d&(1<<25) != 0, FXSROPT) fs.setIf(d&(1<<27) != 0, RDTSCP) + fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) + fs.setIf(d&(1<<31) != 0, AMD3DNOW) /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be * used unless the OS has AVX support. */ if fs.inSet(AVX) { - fs.setIf((c&0x00000800) != 0, XOP) - fs.setIf((c&0x00010000) != 0, FMA4) + fs.setIf((c&(1<<11)) != 0, XOP) + fs.setIf((c&(1<<16)) != 0, FMA4) } } @@ -1085,15 +1305,48 @@ func support() flagSet { if maxExtendedFunction() >= 0x80000008 { _, b, _, _ := cpuid(0x80000008) + fs.setIf(b&(1<<28) != 0, PSFD) + fs.setIf(b&(1<<27) != 0, CPPC) + fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) + fs.setIf(b&(1<<23) != 0, PPIN) + fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) + fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) + fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) + fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) + fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) + fs.setIf(b&(1<<15) != 0, STIBP) + fs.setIf(b&(1<<14) != 0, IBRS) + fs.setIf((b&(1<<13)) != 0, INT_WBINVD) + fs.setIf(b&(1<<12) != 0, IBPB) fs.setIf((b&(1<<9)) != 0, WBNOINVD) fs.setIf((b&(1<<8)) != 0, MCOMMIT) - fs.setIf((b&(1<<13)) != 0, INT_WBINVD) fs.setIf((b&(1<<4)) != 0, RDPRU) fs.setIf((b&(1<<3)) != 0, INVLPGB) fs.setIf((b&(1<<1)) != 0, MSRIRC) fs.setIf((b&(1<<0)) != 0, CLZERO) } + if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { + _, _, _, edx := cpuid(0x8000000A) + fs.setIf((edx>>0)&1 == 1, SVMNP) + fs.setIf((edx>>1)&1 == 1, LBRVIRT) + fs.setIf((edx>>2)&1 == 1, SVML) + fs.setIf((edx>>3)&1 == 1, NRIPS) + fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) + fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) + fs.setIf((edx>>6)&1 == 1, SVMFBASID) + fs.setIf((edx>>7)&1 == 1, SVMDA) + fs.setIf((edx>>10)&1 == 1, SVMPF) + fs.setIf((edx>>12)&1 == 1, SVMPFT) + } + + if maxExtendedFunction() >= 0x8000001a { + eax, _, _, _ := cpuid(0x8000001a) + fs.setIf((eax>>0)&1 == 1, FP128) + fs.setIf((eax>>1)&1 == 1, MOVU) + fs.setIf((eax>>2)&1 == 1, FP256) + } + if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { eax, _, _, _ := cpuid(0x8000001b) fs.setIf((eax>>0)&1 == 1, IBSFFV) @@ -1104,6 +1357,28 @@ func support() flagSet { fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) + fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) + fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) + fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. + fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) + } + + if maxExtendedFunction() >= 0x8000001f && vend == AMD { + a, _, _, _ := cpuid(0x8000001f) + fs.setIf((a>>0)&1 == 1, SME) + fs.setIf((a>>1)&1 == 1, SEV) + fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) + fs.setIf((a>>3)&1 == 1, SEV_ES) + fs.setIf((a>>4)&1 == 1, SEV_SNP) + fs.setIf((a>>5)&1 == 1, VMPL) + fs.setIf((a>>10)&1 == 1, SME_COHERENT) + fs.setIf((a>>11)&1 == 1, SEV_64BIT) + fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) + fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) + fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) + fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) + fs.setIf((a>>16)&1 == 1, VTE) + fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) } return fs |