From 08779c29099e8940493df56d28d8aa131ac8342e Mon Sep 17 00:00:00 2001 From: Wim Date: Thu, 9 Mar 2023 22:48:00 +0100 Subject: Update dependencies (#2007) * Update dependencies --- vendor/github.com/klauspost/cpuid/v2/README.md | 350 +++++++++++++++++++- vendor/github.com/klauspost/cpuid/v2/cpuid.go | 361 ++++++++++++++++++--- vendor/github.com/klauspost/cpuid/v2/detect_x86.go | 2 +- .../klauspost/cpuid/v2/featureid_string.go | 324 ++++++++++-------- .../klauspost/cpuid/v2/os_darwin_arm64.go | 112 ++++++- 5 files changed, 970 insertions(+), 179 deletions(-) (limited to 'vendor/github.com/klauspost/cpuid') diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md index bc2f98f0..857a93e5 100644 --- a/vendor/github.com/klauspost/cpuid/v2/README.md +++ b/vendor/github.com/klauspost/cpuid/v2/README.md @@ -16,10 +16,17 @@ Package home: https://github.com/klauspost/cpuid ## installing -`go get -u github.com/klauspost/cpuid/v2` using modules. - +`go get -u github.com/klauspost/cpuid/v2` using modules. Drop `v2` for others. +### Homebrew + +For macOS/Linux users, you can install via [brew](https://brew.sh/) + +```sh +$ brew install cpuid +``` + ## example ```Go @@ -77,10 +84,14 @@ We have Streaming SIMD 2 Extensions The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features. A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler. +To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc. +This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported. + Note that for some cpu/os combinations some features will not be detected. `amd64` has rather good support and should work reliably on all platforms. -Note that hypervisors may not pass through all CPU features. +Note that hypervisors may not pass through all CPU features through to the guest OS, +so even if your host supports a feature it may not be visible on guests. ## arm64 feature detection @@ -132,6 +143,339 @@ func main() { } ``` +## commandline + +Download as binary from: https://github.com/klauspost/cpuid/releases + +Install from source: + +`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest` + +### Example + +``` +λ cpuid +Name: AMD Ryzen 9 3950X 16-Core Processor +Vendor String: AuthenticAMD +Vendor ID: AMD +PhysicalCores: 16 +Threads Per Core: 2 +Logical Cores: 32 +CPU Family 23 Model: 113 +Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE +Microarchitecture level: 3 +Cacheline bytes: 64 +L1 Instruction Cache: 32768 bytes +L1 Data Cache: 32768 bytes +L2 Cache: 524288 bytes +L3 Cache: 16777216 bytes + +``` +### JSON Output: + +``` +λ cpuid --json +{ + "BrandName": "AMD Ryzen 9 3950X 16-Core Processor", + "VendorID": 2, + "VendorString": "AuthenticAMD", + "PhysicalCores": 16, + "ThreadsPerCore": 2, + "LogicalCores": 32, + "Family": 23, + "Model": 113, + "CacheLine": 64, + "Hz": 0, + "BoostFreq": 0, + "Cache": { + "L1I": 32768, + "L1D": 32768, + "L2": 524288, + "L3": 16777216 + }, + "SGX": { + "Available": false, + "LaunchControl": false, + "SGX1Supported": false, + "SGX2Supported": false, + "MaxEnclaveSizeNot64": 0, + "MaxEnclaveSize64": 0, + "EPCSections": null + }, + "Features": [ + "ADX", + "AESNI", + "AVX", + "AVX2", + "BMI1", + "BMI2", + "CLMUL", + "CLZERO", + "CMOV", + "CMPXCHG8", + "CPBOOST", + "CX16", + "F16C", + "FMA3", + "FXSR", + "FXSROPT", + "HTT", + "HYPERVISOR", + "LAHF", + "LZCNT", + "MCAOVERFLOW", + "MMX", + "MMXEXT", + "MOVBE", + "NX", + "OSXSAVE", + "POPCNT", + "RDRAND", + "RDSEED", + "RDTSCP", + "SCE", + "SHA", + "SSE", + "SSE2", + "SSE3", + "SSE4", + "SSE42", + "SSE4A", + "SSSE3", + "SUCCOR", + "X87", + "XSAVE" + ], + "X64Level": 3 +} +``` + +### Check CPU microarch level + +``` +λ cpuid --check-level=3 +2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor +2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3. +Exit Code 0 + +λ cpuid --check-level=4 +2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor +2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3. +Exit Code 1 +``` + + +## Available flags + +### x86 & amd64 + +| Feature Flag | Description | +|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| ADX | Intel ADX (Multi-Precision Add-Carry Instruction Extensions) | +| AESNI | Advanced Encryption Standard New Instructions | +| AMD3DNOW | AMD 3DNOW | +| AMD3DNOWEXT | AMD 3DNowExt | +| AMXBF16 | Tile computational operations on BFLOAT16 numbers | +| AMXINT8 | Tile computational operations on 8-bit integers | +| AMXFP16 | Tile computational operations on FP16 numbers | +| AMXTILE | Tile architecture | +| AVX | AVX functions | +| AVX2 | AVX2 functions | +| AVX512BF16 | AVX-512 BFLOAT16 Instructions | +| AVX512BITALG | AVX-512 Bit Algorithms | +| AVX512BW | AVX-512 Byte and Word Instructions | +| AVX512CD | AVX-512 Conflict Detection Instructions | +| AVX512DQ | AVX-512 Doubleword and Quadword Instructions | +| AVX512ER | AVX-512 Exponential and Reciprocal Instructions | +| AVX512F | AVX-512 Foundation | +| AVX512FP16 | AVX-512 FP16 Instructions | +| AVX512IFMA | AVX-512 Integer Fused Multiply-Add Instructions | +| AVX512PF | AVX-512 Prefetch Instructions | +| AVX512VBMI | AVX-512 Vector Bit Manipulation Instructions | +| AVX512VBMI2 | AVX-512 Vector Bit Manipulation Instructions, Version 2 | +| AVX512VL | AVX-512 Vector Length Extensions | +| AVX512VNNI | AVX-512 Vector Neural Network Instructions | +| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q | +| AVX512VPOPCNTDQ | AVX-512 Vector Population Count Doubleword and Quadword | +| AVXIFMA | AVX-IFMA instructions | +| AVXNECONVERT | AVX-NE-CONVERT instructions | +| AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one | +| AVXVNNI | AVX (VEX encoded) VNNI neural network instructions | +| AVXVNNIINT8 | AVX-VNNI-INT8 instructions | +| BMI1 | Bit Manipulation Instruction Set 1 | +| BMI2 | Bit Manipulation Instruction Set 2 | +| CETIBT | Intel CET Indirect Branch Tracking | +| CETSS | Intel CET Shadow Stack | +| CLDEMOTE | Cache Line Demote | +| CLMUL | Carry-less Multiplication | +| CLZERO | CLZERO instruction supported | +| CMOV | i686 CMOV | +| CMPCCXADD | CMPCCXADD instructions | +| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB | +| CMPXCHG8 | CMPXCHG8 instruction | +| CPBOOST | Core Performance Boost | +| CPPC | AMD: Collaborative Processor Performance Control | +| CX16 | CMPXCHG16B Instruction | +| EFER_LMSLE_UNS | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ | +| ENQCMD | Enqueue Command | +| ERMS | Enhanced REP MOVSB/STOSB | +| F16C | Half-precision floating-point conversion | +| FLUSH_L1D | Flush L1D cache | +| FMA3 | Intel FMA 3. Does not imply AVX. | +| FMA4 | Bulldozer FMA4 functions | +| FP128 | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide | +| FP256 | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide | +| FSRM | Fast Short Rep Mov | +| FXSR | FXSAVE, FXRESTOR instructions, CR4 bit 9 | +| FXSROPT | FXSAVE/FXRSTOR optimizations | +| GFNI | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. | +| HLE | Hardware Lock Elision | +| HRESET | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR | +| HTT | Hyperthreading (enabled) | +| HWA | Hardware assert supported. Indicates support for MSRC001_10 | +| HYBRID_CPU | This part has CPUs of more than one type. | +| HYPERVISOR | This bit has been reserved by Intel & AMD for use by hypervisors | +| IA32_ARCH_CAP | IA32_ARCH_CAPABILITIES MSR (Intel) | +| IA32_CORE_CAP | IA32_CORE_CAPABILITIES MSR | +| IBPB | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) | +| IBRS | AMD: Indirect Branch Restricted Speculation | +| IBRS_PREFERRED | AMD: IBRS is preferred over software solution | +| IBRS_PROVIDES_SMP | AMD: IBRS provides Same Mode Protection | +| IBS | Instruction Based Sampling (AMD) | +| IBSBRNTRGT | Instruction Based Sampling Feature (AMD) | +| IBSFETCHSAM | Instruction Based Sampling Feature (AMD) | +| IBSFFV | Instruction Based Sampling Feature (AMD) | +| IBSOPCNT | Instruction Based Sampling Feature (AMD) | +| IBSOPCNTEXT | Instruction Based Sampling Feature (AMD) | +| IBSOPSAM | Instruction Based Sampling Feature (AMD) | +| IBSRDWROPCNT | Instruction Based Sampling Feature (AMD) | +| IBSRIPINVALIDCHK | Instruction Based Sampling Feature (AMD) | +| IBS_FETCH_CTLX | AMD: IBS fetch control extended MSR supported | +| IBS_OPDATA4 | AMD: IBS op data 4 MSR supported | +| IBS_OPFUSE | AMD: Indicates support for IbsOpFuse | +| IBS_PREVENTHOST | Disallowing IBS use by the host supported | +| IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 | +| INT_WBINVD | WBINVD/WBNOINVD are interruptible. | +| INVLPGB | NVLPGB and TLBSYNC instruction supported | +| LAHF | LAHF/SAHF in long mode | +| LAM | If set, CPU supports Linear Address Masking | +| LBRVIRT | LBR virtualization | +| LZCNT | LZCNT instruction | +| MCAOVERFLOW | MCA overflow recovery support. | +| MCDT_NO | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. | +| MCOMMIT | MCOMMIT instruction supported | +| MD_CLEAR | VERW clears CPU buffers | +| MMX | standard MMX | +| MMXEXT | SSE integer functions or AMD MMX ext | +| MOVBE | MOVBE instruction (big-endian) | +| MOVDIR64B | Move 64 Bytes as Direct Store | +| MOVDIRI | Move Doubleword as Direct Store | +| MOVSB_ZL | Fast Zero-Length MOVSB | +| MPX | Intel MPX (Memory Protection Extensions) | +| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD | +| MSRIRC | Instruction Retired Counter MSR available | +| MSR_PAGEFLUSH | Page Flush MSR available | +| NRIPS | Indicates support for NRIP save on VMEXIT | +| NX | NX (No-Execute) bit | +| OSXSAVE | XSAVE enabled by OS | +| PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption | +| POPCNT | POPCNT instruction | +| PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled | +| PREFETCHI | PREFETCHIT0/1 instructions | +| PSFD | AMD: Predictive Store Forward Disable | +| RDPRU | RDPRU instruction supported | +| RDRAND | RDRAND instruction is available | +| RDSEED | RDSEED instruction is available | +| RDTSCP | RDTSCP Instruction | +| RTM | Restricted Transactional Memory | +| RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. | +| SERIALIZE | Serialize Instruction Execution | +| SEV | AMD Secure Encrypted Virtualization supported | +| SEV_64BIT | AMD SEV guest execution only allowed from a 64-bit host | +| SEV_ALTERNATIVE | AMD SEV Alternate Injection supported | +| SEV_DEBUGSWAP | Full debug state swap supported for SEV-ES guests | +| SEV_ES | AMD SEV Encrypted State supported | +| SEV_RESTRICTED | AMD SEV Restricted Injection supported | +| SEV_SNP | AMD SEV Secure Nested Paging supported | +| SGX | Software Guard Extensions | +| SGXLC | Software Guard Extensions Launch Control | +| SHA | Intel SHA Extensions | +| SME | AMD Secure Memory Encryption supported | +| SME_COHERENT | AMD Hardware cache coherency across encryption domains enforced | +| SPEC_CTRL_SSBD | Speculative Store Bypass Disable | +| SRBDS_CTRL | SRBDS mitigation MSR available | +| SSE | SSE functions | +| SSE2 | P4 SSE functions | +| SSE3 | Prescott SSE3 functions | +| SSE4 | Penryn SSE4.1 functions | +| SSE42 | Nehalem SSE4.2 functions | +| SSE4A | AMD Barcelona microarchitecture SSE4a instructions | +| SSSE3 | Conroe SSSE3 functions | +| STIBP | Single Thread Indirect Branch Predictors | +| STIBP_ALWAYSON | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On | +| STOSB_SHORT | Fast short STOSB | +| SUCCOR | Software uncorrectable error containment and recovery capability. | +| SVM | AMD Secure Virtual Machine | +| SVMDA | Indicates support for the SVM decode assists. | +| SVMFBASID | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control | +| SVML | AMD SVM lock. Indicates support for SVM-Lock. | +| SVMNP | AMD SVM nested paging | +| SVMPF | SVM pause intercept filter. Indicates support for the pause intercept filter | +| SVMPFT | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold | +| SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. | +| SYSEE | SYSENTER and SYSEXIT instructions | +| TBM | AMD Trailing Bit Manipulation | +| TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations | +| TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. | +| TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. | +| TSCRATEMSR | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 | +| TSXLDTRK | Intel TSX Suspend Load Address Tracking | +| VAES | Vector AES. AVX(512) versions requires additional checks. | +| VMCBCLEAN | VMCB clean bits. Indicates support for VMCB clean bits. | +| VMPL | AMD VM Permission Levels supported | +| VMSA_REGPROT | AMD VMSA Register Protection supported | +| VMX | Virtual Machine Extensions | +| VPCLMULQDQ | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. | +| VTE | AMD Virtual Transparent Encryption supported | +| WAITPKG | TPAUSE, UMONITOR, UMWAIT | +| WBNOINVD | Write Back and Do Not Invalidate Cache | +| X87 | FPU | +| XGETBV1 | Supports XGETBV with ECX = 1 | +| XOP | Bulldozer XOP functions | +| XSAVE | XSAVE, XRESTOR, XSETBV, XGETBV | +| XSAVEC | Supports XSAVEC and the compacted form of XRSTOR. | +| XSAVEOPT | XSAVEOPT available | +| XSAVES | Supports XSAVES/XRSTORS and IA32_XSS | + +# ARM features: + +| Feature Flag | Description | +|--------------|------------------------------------------------------------------| +| AESARM | AES instructions | +| ARMCPUID | Some CPU ID registers readable at user-level | +| ASIMD | Advanced SIMD | +| ASIMDDP | SIMD Dot Product | +| ASIMDHP | Advanced SIMD half-precision floating point | +| ASIMDRDM | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) | +| ATOMICS | Large System Extensions (LSE) | +| CRC32 | CRC32/CRC32C instructions | +| DCPOP | Data cache clean to Point of Persistence (DC CVAP) | +| EVTSTRM | Generic timer | +| FCMA | Floatin point complex number addition and multiplication | +| FP | Single-precision and double-precision floating point | +| FPHP | Half-precision floating point | +| GPA | Generic Pointer Authentication | +| JSCVT | Javascript-style double->int convert (FJCVTZS) | +| LRCPC | Weaker release consistency (LDAPR, etc) | +| PMULL | Polynomial Multiply instructions (PMULL/PMULL2) | +| SHA1 | SHA-1 instructions (SHA1C, etc) | +| SHA2 | SHA-2 instructions (SHA256H, etc) | +| SHA3 | SHA-3 instructions (EOR3, RAXI, XAR, BCAX) | +| SHA512 | SHA512 instructions | +| SM3 | SM3 instructions | +| SM4 | SM4 instructions | +| SVE | Scalable Vector Extension | + # license This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go index 3d543ce9..cf2ae9c5 100644 --- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go +++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go @@ -14,6 +14,7 @@ import ( "flag" "fmt" "math" + "math/bits" "os" "runtime" "strings" @@ -72,6 +73,7 @@ const ( AMD3DNOW // AMD 3DNOW AMD3DNOWEXT // AMD 3DNowExt AMXBF16 // Tile computational operations on BFLOAT16 numbers + AMXFP16 // Tile computational operations on FP16 numbers AMXINT8 // Tile computational operations on 8-bit integers AMXTILE // Tile architecture AVX // AVX functions @@ -92,7 +94,11 @@ const ( AVX512VNNI // AVX-512 Vector Neural Network Instructions AVX512VP2INTERSECT // AVX-512 Intersect for D/Q AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword - AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one. + AVXIFMA // AVX-IFMA instructions + AVXNECONVERT // AVX-NE-CONVERT instructions + AVXSLOW // Indicates the CPU performs 2 128 bit operations instead of one + AVXVNNI // AVX (VEX encoded) VNNI neural network instructions + AVXVNNIINT8 // AVX-VNNI-INT8 instructions BMI1 // Bit Manipulation Instruction Set 1 BMI2 // Bit Manipulation Instruction Set 2 CETIBT // Intel CET Indirect Branch Tracking @@ -101,22 +107,37 @@ const ( CLMUL // Carry-less Multiplication CLZERO // CLZERO instruction supported CMOV // i686 CMOV + CMPCCXADD // CMPCCXADD instructions + CMPSB_SCADBS_SHORT // Fast short CMPSB and SCASB CMPXCHG8 // CMPXCHG8 instruction CPBOOST // Core Performance Boost + CPPC // AMD: Collaborative Processor Performance Control CX16 // CMPXCHG16B Instruction + EFER_LMSLE_UNS // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ ENQCMD // Enqueue Command ERMS // Enhanced REP MOVSB/STOSB F16C // Half-precision floating-point conversion + FLUSH_L1D // Flush L1D cache FMA3 // Intel FMA 3. Does not imply AVX. FMA4 // Bulldozer FMA4 functions + FP128 // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide + FP256 // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide + FSRM // Fast Short Rep Mov FXSR // FXSAVE, FXRESTOR instructions, CR4 bit 9 FXSROPT // FXSAVE/FXRSTOR optimizations - GFNI // Galois Field New Instructions + GFNI // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. HLE // Hardware Lock Elision + HRESET // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR HTT // Hyperthreading (enabled) HWA // Hardware assert supported. Indicates support for MSRC001_10 + HYBRID_CPU // This part has CPUs of more than one type. HYPERVISOR // This bit has been reserved by Intel & AMD for use by hypervisors + IA32_ARCH_CAP // IA32_ARCH_CAPABILITIES MSR (Intel) + IA32_CORE_CAP // IA32_CORE_CAPABILITIES MSR IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + IBRS // AMD: Indirect Branch Restricted Speculation + IBRS_PREFERRED // AMD: IBRS is preferred over software solution + IBRS_PROVIDES_SMP // AMD: IBRS provides Same Mode Protection IBS // Instruction Based Sampling (AMD) IBSBRNTRGT // Instruction Based Sampling Feature (AMD) IBSFETCHSAM // Instruction Based Sampling Feature (AMD) @@ -126,33 +147,60 @@ const ( IBSOPSAM // Instruction Based Sampling Feature (AMD) IBSRDWROPCNT // Instruction Based Sampling Feature (AMD) IBSRIPINVALIDCHK // Instruction Based Sampling Feature (AMD) + IBS_FETCH_CTLX // AMD: IBS fetch control extended MSR supported + IBS_OPDATA4 // AMD: IBS op data 4 MSR supported + IBS_OPFUSE // AMD: Indicates support for IbsOpFuse + IBS_PREVENTHOST // Disallowing IBS use by the host supported + IBS_ZEN4 // AMD: Fetch and Op IBS support IBS extensions added with Zen4 INT_WBINVD // WBINVD/WBNOINVD are interruptible. INVLPGB // NVLPGB and TLBSYNC instruction supported LAHF // LAHF/SAHF in long mode + LAM // If set, CPU supports Linear Address Masking + LBRVIRT // LBR virtualization LZCNT // LZCNT instruction MCAOVERFLOW // MCA overflow recovery support. + MCDT_NO // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. MCOMMIT // MCOMMIT instruction supported + MD_CLEAR // VERW clears CPU buffers MMX // standard MMX MMXEXT // SSE integer functions or AMD MMX ext MOVBE // MOVBE instruction (big-endian) MOVDIR64B // Move 64 Bytes as Direct Store MOVDIRI // Move Doubleword as Direct Store + MOVSB_ZL // Fast Zero-Length MOVSB + MOVU // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD MPX // Intel MPX (Memory Protection Extensions) MSRIRC // Instruction Retired Counter MSR available + MSR_PAGEFLUSH // Page Flush MSR available + NRIPS // Indicates support for NRIP save on VMEXIT NX // NX (No-Execute) bit OSXSAVE // XSAVE enabled by OS + PCONFIG // PCONFIG for Intel Multi-Key Total Memory Encryption POPCNT // POPCNT instruction + PPIN // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled + PREFETCHI // PREFETCHIT0/1 instructions + PSFD // AMD: Predictive Store Forward Disable RDPRU // RDPRU instruction supported RDRAND // RDRAND instruction is available RDSEED // RDSEED instruction is available RDTSCP // RDTSCP Instruction RTM // Restricted Transactional Memory RTM_ALWAYS_ABORT // Indicates that the loaded microcode is forcing RTM abort. - SCE // SYSENTER and SYSEXIT instructions SERIALIZE // Serialize Instruction Execution + SEV // AMD Secure Encrypted Virtualization supported + SEV_64BIT // AMD SEV guest execution only allowed from a 64-bit host + SEV_ALTERNATIVE // AMD SEV Alternate Injection supported + SEV_DEBUGSWAP // Full debug state swap supported for SEV-ES guests + SEV_ES // AMD SEV Encrypted State supported + SEV_RESTRICTED // AMD SEV Restricted Injection supported + SEV_SNP // AMD SEV Secure Nested Paging supported SGX // Software Guard Extensions SGXLC // Software Guard Extensions Launch Control SHA // Intel SHA Extensions + SME // AMD Secure Memory Encryption supported + SME_COHERENT // AMD Hardware cache coherency across encryption domains enforced + SPEC_CTRL_SSBD // Speculative Store Bypass Disable + SRBDS_CTRL // SRBDS mitigation MSR available SSE // SSE functions SSE2 // P4 SSE functions SSE3 // Prescott SSE3 functions @@ -161,17 +209,40 @@ const ( SSE4A // AMD Barcelona microarchitecture SSE4a instructions SSSE3 // Conroe SSSE3 functions STIBP // Single Thread Indirect Branch Predictors + STIBP_ALWAYSON // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On + STOSB_SHORT // Fast short STOSB SUCCOR // Software uncorrectable error containment and recovery capability. + SVM // AMD Secure Virtual Machine + SVMDA // Indicates support for the SVM decode assists. + SVMFBASID // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control + SVML // AMD SVM lock. Indicates support for SVM-Lock. + SVMNP // AMD SVM nested paging + SVMPF // SVM pause intercept filter. Indicates support for the pause intercept filter + SVMPFT // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold + SYSCALL // System-Call Extension (SCE): SYSCALL and SYSRET instructions. + SYSEE // SYSENTER and SYSEXIT instructions TBM // AMD Trailing Bit Manipulation + TLB_FLUSH_NESTED // AMD: Flushing includes all the nested translations for guest translations + TME // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. + TOPEXT // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. + TSCRATEMSR // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 TSXLDTRK // Intel TSX Suspend Load Address Tracking - VAES // Vector AES + VAES // Vector AES. AVX(512) versions requires additional checks. + VMCBCLEAN // VMCB clean bits. Indicates support for VMCB clean bits. + VMPL // AMD VM Permission Levels supported + VMSA_REGPROT // AMD VMSA Register Protection supported VMX // Virtual Machine Extensions - VPCLMULQDQ // Carry-Less Multiplication Quadword + VPCLMULQDQ // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. + VTE // AMD Virtual Transparent Encryption supported WAITPKG // TPAUSE, UMONITOR, UMWAIT WBNOINVD // Write Back and Do Not Invalidate Cache X87 // FPU + XGETBV1 // Supports XGETBV with ECX = 1 XOP // Bulldozer XOP functions XSAVE // XSAVE, XRESTOR, XSETBV, XGETBV + XSAVEC // Supports XSAVEC and the compacted form of XRSTOR. + XSAVEOPT // XSAVEOPT available + XSAVES // Supports XSAVES/XRSTORS and IA32_XSS // ARM features: AESARM // AES instructions @@ -198,7 +269,6 @@ const ( SM3 // SM3 instructions SM4 // SM4 instructions SVE // Scalable Vector Extension - // Keep it last. It automatically defines the size of []flagSet lastID @@ -216,6 +286,7 @@ type CPUInfo struct { LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. Family int // CPU family number Model int // CPU model number + Stepping int // CPU stepping info CacheLine int // Cache line size in bytes. Will be 0 if undetectable. Hz int64 // Clock speed, if known, 0 otherwise. Will attempt to contain base clock speed. BoostFreq int64 // Max clock speed, if known, 0 otherwise @@ -318,30 +389,61 @@ func (c CPUInfo) Supports(ids ...FeatureID) bool { // Has allows for checking a single feature. // Should be inlined by the compiler. -func (c CPUInfo) Has(id FeatureID) bool { +func (c *CPUInfo) Has(id FeatureID) bool { return c.featureSet.inSet(id) } +// AnyOf returns whether the CPU supports one or more of the requested features. +func (c CPUInfo) AnyOf(ids ...FeatureID) bool { + for _, id := range ids { + if c.featureSet.inSet(id) { + return true + } + } + return false +} + +// Features contains several features combined for a fast check using +// CpuInfo.HasAll +type Features *flagSet + +// CombineFeatures allows to combine several features for a close to constant time lookup. +func CombineFeatures(ids ...FeatureID) Features { + var v flagSet + for _, id := range ids { + v.set(id) + } + return &v +} + +func (c *CPUInfo) HasAll(f Features) bool { + return c.featureSet.hasSetP(f) +} + // https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels -var level1Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2) -var level2Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) -var level3Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) -var level4Features = flagSetWith(CMOV, CMPXCHG8, X87, FXSR, MMX, SCE, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) +var oneOfLevel = CombineFeatures(SYSEE, SYSCALL) +var level1Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2) +var level2Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3) +var level3Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE) +var level4Features = CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SSE, SSE2, CX16, LAHF, POPCNT, SSE3, SSE4, SSE42, SSSE3, AVX, AVX2, BMI1, BMI2, F16C, FMA3, LZCNT, MOVBE, OSXSAVE, AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL) // X64Level returns the microarchitecture level detected on the CPU. // If features are lacking or non x64 mode, 0 is returned. // See https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels func (c CPUInfo) X64Level() int { - if c.featureSet.hasSet(level4Features) { + if !c.featureSet.hasOneOf(oneOfLevel) { + return 0 + } + if c.featureSet.hasSetP(level4Features) { return 4 } - if c.featureSet.hasSet(level3Features) { + if c.featureSet.hasSetP(level3Features) { return 3 } - if c.featureSet.hasSet(level2Features) { + if c.featureSet.hasSetP(level2Features) { return 2 } - if c.featureSet.hasSet(level1Features) { + if c.featureSet.hasSetP(level1Features) { return 1 } return 0 @@ -369,8 +471,9 @@ func (c CPUInfo) IsVendor(v Vendor) bool { return c.VendorID == v } +// FeatureSet returns all available features as strings. func (c CPUInfo) FeatureSet() []string { - s := make([]string, 0) + s := make([]string, 0, c.featureSet.nEnabled()) s = append(s, c.featureSet.Strings()...) return s } @@ -504,7 +607,7 @@ const flagMask = flagBits - 1 // flagSet contains detected cpu features and characteristics in an array of flags type flagSet [(lastID + flagMask) / flagBits]flags -func (s flagSet) inSet(feat FeatureID) bool { +func (s *flagSet) inSet(feat FeatureID) bool { return s[feat>>flagBitsLog2]&(1<<(feat&flagMask)) != 0 } @@ -534,7 +637,7 @@ func (s *flagSet) or(other flagSet) { } // hasSet returns whether all features are present. -func (s flagSet) hasSet(other flagSet) bool { +func (s *flagSet) hasSet(other flagSet) bool { for i, v := range other[:] { if s[i]&v != v { return false @@ -543,6 +646,34 @@ func (s flagSet) hasSet(other flagSet) bool { return true } +// hasSet returns whether all features are present. +func (s *flagSet) hasSetP(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != v { + return false + } + } + return true +} + +// hasOneOf returns whether one or more features are present. +func (s *flagSet) hasOneOf(other *flagSet) bool { + for i, v := range other[:] { + if s[i]&v != 0 { + return true + } + } + return false +} + +// nEnabled will return the number of enabled flags. +func (s *flagSet) nEnabled() (n int) { + for _, v := range s[:] { + n += bits.OnesCount64(uint64(v)) + } + return n +} + func flagSetWith(feat ...FeatureID) flagSet { var res flagSet for _, f := range feat { @@ -631,7 +762,7 @@ func threadsPerCore() int { if vend == AMD { // Workaround for AMD returning 0, assume 2 if >= Zen 2 // It will be more correct than not. - fam, _ := familyModel() + fam, _, _ := familyModel() _, _, _, d := cpuid(1) if (d&(1<<28)) != 0 && fam >= 23 { return 2 @@ -669,14 +800,27 @@ func logicalCores() int { } } -func familyModel() (int, int) { +func familyModel() (family, model, stepping int) { if maxFunctionID() < 0x1 { - return 0, 0 + return 0, 0, 0 } eax, _, _, _ := cpuid(1) - family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) - model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) - return int(family), int(model) + // If BaseFamily[3:0] is less than Fh then ExtendedFamily[7:0] is reserved and Family is equal to BaseFamily[3:0]. + family = int((eax >> 8) & 0xf) + extFam := family == 0x6 // Intel is 0x6, needs extended model. + if family == 0xf { + // Add ExtFamily + family += int((eax >> 20) & 0xff) + extFam = true + } + // If BaseFamily[3:0] is less than 0Fh then ExtendedModel[3:0] is reserved and Model is equal to BaseModel[3:0]. + model = int((eax >> 4) & 0xf) + if extFam { + // Add ExtModel + model += int((eax >> 12) & 0xf0) + } + stepping = int(eax & 0xf) + return family, model, stepping } func physicalCores() int { @@ -811,9 +955,14 @@ func (c *CPUInfo) cacheSize() { c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties - if maxExtendedFunction() < 0x8000001D { + if maxExtendedFunction() < 0x8000001D || !c.Has(TOPEXT) { return } + + // Xen Hypervisor is buggy and returns the same entry no matter ECX value. + // Hack: When we encounter the same entry 100 times we break. + nSame := 0 + var last uint32 for i := uint32(0); i < math.MaxUint32; i++ { eax, ebx, ecx, _ := cpuidex(0x8000001D, i) @@ -829,6 +978,16 @@ func (c *CPUInfo) cacheSize() { return } + // Check for the same value repeated. + comb := eax ^ ebx ^ ecx + if comb == last { + nSame++ + if nSame == 100 { + return + } + } + last = comb + switch level { case 1: switch typ { @@ -913,14 +1072,13 @@ func support() flagSet { if mfi < 0x1 { return fs } - family, model := familyModel() + family, model, _ := familyModel() _, _, c, d := cpuid(1) fs.setIf((d&(1<<0)) != 0, X87) fs.setIf((d&(1<<8)) != 0, CMPXCHG8) - fs.setIf((d&(1<<11)) != 0, SCE) + fs.setIf((d&(1<<11)) != 0, SYSEE) fs.setIf((d&(1<<15)) != 0, CMOV) - fs.setIf((d&(1<<22)) != 0, MMXEXT) fs.setIf((d&(1<<23)) != 0, MMX) fs.setIf((d&(1<<24)) != 0, FXSR) fs.setIf((d&(1<<25)) != 0, FXSROPT) @@ -928,9 +1086,9 @@ func support() flagSet { fs.setIf((d&(1<<26)) != 0, SSE2) fs.setIf((c&1) != 0, SSE3) fs.setIf((c&(1<<5)) != 0, VMX) - fs.setIf((c&0x00000200) != 0, SSSE3) - fs.setIf((c&0x00080000) != 0, SSE4) - fs.setIf((c&0x00100000) != 0, SSE42) + fs.setIf((c&(1<<9)) != 0, SSSE3) + fs.setIf((c&(1<<19)) != 0, SSE4) + fs.setIf((c&(1<<20)) != 0, SSE42) fs.setIf((c&(1<<25)) != 0, AESNI) fs.setIf((c&(1<<1)) != 0, CLMUL) fs.setIf(c&(1<<22) != 0, MOVBE) @@ -976,7 +1134,6 @@ func support() flagSet { // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. if mfi >= 7 { _, ebx, ecx, edx := cpuidex(7, 0) - eax1, _, _, _ := cpuidex(7, 1) if fs.inSet(AVX) && (ebx&0x00000020) != 0 { fs.set(AVX2) } @@ -993,21 +1150,52 @@ func support() flagSet { fs.setIf(ebx&(1<<18) != 0, RDSEED) fs.setIf(ebx&(1<<19) != 0, ADX) fs.setIf(ebx&(1<<29) != 0, SHA) + // CPUID.(EAX=7, ECX=0).ECX fs.setIf(ecx&(1<<5) != 0, WAITPKG) fs.setIf(ecx&(1<<7) != 0, CETSS) + fs.setIf(ecx&(1<<8) != 0, GFNI) + fs.setIf(ecx&(1<<9) != 0, VAES) + fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) + fs.setIf(ecx&(1<<13) != 0, TME) fs.setIf(ecx&(1<<25) != 0, CLDEMOTE) fs.setIf(ecx&(1<<27) != 0, MOVDIRI) fs.setIf(ecx&(1<<28) != 0, MOVDIR64B) fs.setIf(ecx&(1<<29) != 0, ENQCMD) fs.setIf(ecx&(1<<30) != 0, SGXLC) + // CPUID.(EAX=7, ECX=0).EDX + fs.setIf(edx&(1<<4) != 0, FSRM) + fs.setIf(edx&(1<<9) != 0, SRBDS_CTRL) + fs.setIf(edx&(1<<10) != 0, MD_CLEAR) fs.setIf(edx&(1<<11) != 0, RTM_ALWAYS_ABORT) fs.setIf(edx&(1<<14) != 0, SERIALIZE) + fs.setIf(edx&(1<<15) != 0, HYBRID_CPU) fs.setIf(edx&(1<<16) != 0, TSXLDTRK) + fs.setIf(edx&(1<<18) != 0, PCONFIG) fs.setIf(edx&(1<<20) != 0, CETIBT) fs.setIf(edx&(1<<26) != 0, IBPB) fs.setIf(edx&(1<<27) != 0, STIBP) + fs.setIf(edx&(1<<28) != 0, FLUSH_L1D) + fs.setIf(edx&(1<<29) != 0, IA32_ARCH_CAP) + fs.setIf(edx&(1<<30) != 0, IA32_CORE_CAP) + fs.setIf(edx&(1<<31) != 0, SPEC_CTRL_SSBD) + + // CPUID.(EAX=7, ECX=1).EDX + fs.setIf(edx&(1<<4) != 0, AVXVNNIINT8) + fs.setIf(edx&(1<<5) != 0, AVXNECONVERT) + fs.setIf(edx&(1<<14) != 0, PREFETCHI) + + // CPUID.(EAX=7, ECX=1).EAX + eax1, _, _, _ := cpuidex(7, 1) + fs.setIf(fs.inSet(AVX) && eax1&(1<<4) != 0, AVXVNNI) + fs.setIf(eax1&(1<<7) != 0, CMPCCXADD) + fs.setIf(eax1&(1<<10) != 0, MOVSB_ZL) + fs.setIf(eax1&(1<<11) != 0, STOSB_SHORT) + fs.setIf(eax1&(1<<12) != 0, CMPSB_SCADBS_SHORT) + fs.setIf(eax1&(1<<22) != 0, HRESET) + fs.setIf(eax1&(1<<23) != 0, AVXIFMA) + fs.setIf(eax1&(1<<26) != 0, LAM) // Only detect AVX-512 features if XGETBV is supported if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { @@ -1033,9 +1221,6 @@ func support() flagSet { // ecx fs.setIf(ecx&(1<<1) != 0, AVX512VBMI) fs.setIf(ecx&(1<<6) != 0, AVX512VBMI2) - fs.setIf(ecx&(1<<8) != 0, GFNI) - fs.setIf(ecx&(1<<9) != 0, VAES) - fs.setIf(ecx&(1<<10) != 0, VPCLMULQDQ) fs.setIf(ecx&(1<<11) != 0, AVX512VNNI) fs.setIf(ecx&(1<<12) != 0, AVX512BITALG) fs.setIf(ecx&(1<<14) != 0, AVX512VPOPCNTDQ) @@ -1047,31 +1232,66 @@ func support() flagSet { fs.setIf(edx&(1<<25) != 0, AMXINT8) // eax1 = CPUID.(EAX=7, ECX=1).EAX fs.setIf(eax1&(1<<5) != 0, AVX512BF16) + fs.setIf(eax1&(1<<21) != 0, AMXFP16) } } + + // CPUID.(EAX=7, ECX=2) + _, _, _, edx = cpuidex(7, 2) + fs.setIf(edx&(1<<5) != 0, MCDT_NO) } + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, ECX = 1) + // EAX + // Bit 00: XSAVEOPT is available. + // Bit 01: Supports XSAVEC and the compacted form of XRSTOR if set. + // Bit 02: Supports XGETBV with ECX = 1 if set. + // Bit 03: Supports XSAVES/XRSTORS and IA32_XSS if set. + // Bits 31 - 04: Reserved. + // EBX + // Bits 31 - 00: The size in bytes of the XSAVE area containing all states enabled by XCRO | IA32_XSS. + // ECX + // Bits 31 - 00: Reports the supported bits of the lower 32 bits of the IA32_XSS MSR. IA32_XSS[n] can be set to 1 only if ECX[n] is 1. + // EDX? + // Bits 07 - 00: Used for XCR0. Bit 08: PT state. Bit 09: Used for XCR0. Bits 12 - 10: Reserved. Bit 13: HWP state. Bits 31 - 14: Reserved. + if mfi >= 0xd { + if fs.inSet(XSAVE) { + eax, _, _, _ := cpuidex(0xd, 1) + fs.setIf(eax&(1<<0) != 0, XSAVEOPT) + fs.setIf(eax&(1<<1) != 0, XSAVEC) + fs.setIf(eax&(1<<2) != 0, XGETBV1) + fs.setIf(eax&(1<<3) != 0, XSAVES) + } + } if maxExtendedFunction() >= 0x80000001 { _, _, c, d := cpuid(0x80000001) if (c & (1 << 5)) != 0 { fs.set(LZCNT) fs.set(POPCNT) } + // ECX fs.setIf((c&(1<<0)) != 0, LAHF) - fs.setIf((c&(1<<10)) != 0, IBS) - fs.setIf((d&(1<<31)) != 0, AMD3DNOW) - fs.setIf((d&(1<<30)) != 0, AMD3DNOWEXT) - fs.setIf((d&(1<<23)) != 0, MMX) - fs.setIf((d&(1<<22)) != 0, MMXEXT) + fs.setIf((c&(1<<2)) != 0, SVM) fs.setIf((c&(1<<6)) != 0, SSE4A) + fs.setIf((c&(1<<10)) != 0, IBS) + fs.setIf((c&(1<<22)) != 0, TOPEXT) + + // EDX + fs.setIf(d&(1<<11) != 0, SYSCALL) fs.setIf(d&(1<<20) != 0, NX) + fs.setIf(d&(1<<22) != 0, MMXEXT) + fs.setIf(d&(1<<23) != 0, MMX) + fs.setIf(d&(1<<24) != 0, FXSR) + fs.setIf(d&(1<<25) != 0, FXSROPT) fs.setIf(d&(1<<27) != 0, RDTSCP) + fs.setIf(d&(1<<30) != 0, AMD3DNOWEXT) + fs.setIf(d&(1<<31) != 0, AMD3DNOW) /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be * used unless the OS has AVX support. */ if fs.inSet(AVX) { - fs.setIf((c&0x00000800) != 0, XOP) - fs.setIf((c&0x00010000) != 0, FMA4) + fs.setIf((c&(1<<11)) != 0, XOP) + fs.setIf((c&(1<<16)) != 0, FMA4) } } @@ -1085,15 +1305,48 @@ func support() flagSet { if maxExtendedFunction() >= 0x80000008 { _, b, _, _ := cpuid(0x80000008) + fs.setIf(b&(1<<28) != 0, PSFD) + fs.setIf(b&(1<<27) != 0, CPPC) + fs.setIf(b&(1<<24) != 0, SPEC_CTRL_SSBD) + fs.setIf(b&(1<<23) != 0, PPIN) + fs.setIf(b&(1<<21) != 0, TLB_FLUSH_NESTED) + fs.setIf(b&(1<<20) != 0, EFER_LMSLE_UNS) + fs.setIf(b&(1<<19) != 0, IBRS_PROVIDES_SMP) + fs.setIf(b&(1<<18) != 0, IBRS_PREFERRED) + fs.setIf(b&(1<<17) != 0, STIBP_ALWAYSON) + fs.setIf(b&(1<<15) != 0, STIBP) + fs.setIf(b&(1<<14) != 0, IBRS) + fs.setIf((b&(1<<13)) != 0, INT_WBINVD) + fs.setIf(b&(1<<12) != 0, IBPB) fs.setIf((b&(1<<9)) != 0, WBNOINVD) fs.setIf((b&(1<<8)) != 0, MCOMMIT) - fs.setIf((b&(1<<13)) != 0, INT_WBINVD) fs.setIf((b&(1<<4)) != 0, RDPRU) fs.setIf((b&(1<<3)) != 0, INVLPGB) fs.setIf((b&(1<<1)) != 0, MSRIRC) fs.setIf((b&(1<<0)) != 0, CLZERO) } + if fs.inSet(SVM) && maxExtendedFunction() >= 0x8000000A { + _, _, _, edx := cpuid(0x8000000A) + fs.setIf((edx>>0)&1 == 1, SVMNP) + fs.setIf((edx>>1)&1 == 1, LBRVIRT) + fs.setIf((edx>>2)&1 == 1, SVML) + fs.setIf((edx>>3)&1 == 1, NRIPS) + fs.setIf((edx>>4)&1 == 1, TSCRATEMSR) + fs.setIf((edx>>5)&1 == 1, VMCBCLEAN) + fs.setIf((edx>>6)&1 == 1, SVMFBASID) + fs.setIf((edx>>7)&1 == 1, SVMDA) + fs.setIf((edx>>10)&1 == 1, SVMPF) + fs.setIf((edx>>12)&1 == 1, SVMPFT) + } + + if maxExtendedFunction() >= 0x8000001a { + eax, _, _, _ := cpuid(0x8000001a) + fs.setIf((eax>>0)&1 == 1, FP128) + fs.setIf((eax>>1)&1 == 1, MOVU) + fs.setIf((eax>>2)&1 == 1, FP256) + } + if maxExtendedFunction() >= 0x8000001b && fs.inSet(IBS) { eax, _, _, _ := cpuid(0x8000001b) fs.setIf((eax>>0)&1 == 1, IBSFFV) @@ -1104,6 +1357,28 @@ func support() flagSet { fs.setIf((eax>>5)&1 == 1, IBSBRNTRGT) fs.setIf((eax>>6)&1 == 1, IBSOPCNTEXT) fs.setIf((eax>>7)&1 == 1, IBSRIPINVALIDCHK) + fs.setIf((eax>>8)&1 == 1, IBS_OPFUSE) + fs.setIf((eax>>9)&1 == 1, IBS_FETCH_CTLX) + fs.setIf((eax>>10)&1 == 1, IBS_OPDATA4) // Doc says "Fixed,0. IBS op data 4 MSR supported", but assuming they mean 1. + fs.setIf((eax>>11)&1 == 1, IBS_ZEN4) + } + + if maxExtendedFunction() >= 0x8000001f && vend == AMD { + a, _, _, _ := cpuid(0x8000001f) + fs.setIf((a>>0)&1 == 1, SME) + fs.setIf((a>>1)&1 == 1, SEV) + fs.setIf((a>>2)&1 == 1, MSR_PAGEFLUSH) + fs.setIf((a>>3)&1 == 1, SEV_ES) + fs.setIf((a>>4)&1 == 1, SEV_SNP) + fs.setIf((a>>5)&1 == 1, VMPL) + fs.setIf((a>>10)&1 == 1, SME_COHERENT) + fs.setIf((a>>11)&1 == 1, SEV_64BIT) + fs.setIf((a>>12)&1 == 1, SEV_RESTRICTED) + fs.setIf((a>>13)&1 == 1, SEV_ALTERNATIVE) + fs.setIf((a>>14)&1 == 1, SEV_DEBUGSWAP) + fs.setIf((a>>15)&1 == 1, IBS_PREVENTHOST) + fs.setIf((a>>16)&1 == 1, VTE) + fs.setIf((a>>24)&1 == 1, VMSA_REGPROT) } return fs diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go index 35678d8a..c946824e 100644 --- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go +++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go @@ -24,7 +24,7 @@ func addInfo(c *CPUInfo, safe bool) { c.maxExFunc = maxExtendedFunction() c.BrandName = brandName() c.CacheLine = cacheLine() - c.Family, c.Model = familyModel() + c.Family, c.Model, c.Stepping = familyModel() c.featureSet = support() c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC)) c.ThreadsPerCore = threadsPerCore() diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go index 02fe232a..8b6cd2b7 100644 --- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go +++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go @@ -13,137 +13,207 @@ func _() { _ = x[AMD3DNOW-3] _ = x[AMD3DNOWEXT-4] _ = x[AMXBF16-5] - _ = x[AMXINT8-6] - _ = x[AMXTILE-7] - _ = x[AVX-8] - _ = x[AVX2-9] - _ = x[AVX512BF16-10] - _ = x[AVX512BITALG-11] - _ = x[AVX512BW-12] - _ = x[AVX512CD-13] - _ = x[AVX512DQ-14] - _ = x[AVX512ER-15] - _ = x[AVX512F-16] - _ = x[AVX512FP16-17] - _ = x[AVX512IFMA-18] - _ = x[AVX512PF-19] - _ = x[AVX512VBMI-20] - _ = x[AVX512VBMI2-21] - _ = x[AVX512VL-22] - _ = x[AVX512VNNI-23] - _ = x[AVX512VP2INTERSECT-24] - _ = x[AVX512VPOPCNTDQ-25] - _ = x[AVXSLOW-26] - _ = x[BMI1-27] - _ = x[BMI2-28] - _ = x[CETIBT-29] - _ = x[CETSS-30] - _ = x[CLDEMOTE-31] - _ = x[CLMUL-32] - _ = x[CLZERO-33] - _ = x[CMOV-34] - _ = x[CMPXCHG8-35] - _ = x[CPBOOST-36] - _ = x[CX16-37] - _ = x[ENQCMD-38] - _ = x[ERMS-39] - _ = x[F16C-40] - _ = x[FMA3-41] - _ = x[FMA4-42] - _ = x[FXSR-43] - _ = x[FXSROPT-44] - _ = x[GFNI-45] - _ = x[HLE-46] - _ = x[HTT-47] - _ = x[HWA-48] - _ = x[HYPERVISOR-49] - _ = x[IBPB-50] - _ = x[IBS-51] - _ = x[IBSBRNTRGT-52] - _ = x[IBSFETCHSAM-53] - _ = x[IBSFFV-54] - _ = x[IBSOPCNT-55] - _ = x[IBSOPCNTEXT-56] - _ = x[IBSOPSAM-57] - _ = x[IBSRDWROPCNT-58] - _ = x[IBSRIPINVALIDCHK-59] - _ = x[INT_WBINVD-60] - _ = x[INVLPGB-61] - _ = x[LAHF-62] - _ = x[LZCNT-63] - _ = x[MCAOVERFLOW-64] - _ = x[MCOMMIT-65] - _ = x[MMX-66] - _ = x[MMXEXT-67] - _ = x[MOVBE-68] - _ = x[MOVDIR64B-69] - _ = x[MOVDIRI-70] - _ = x[MPX-71] - _ = x[MSRIRC-72] - _ = x[NX-73] - _ = x[OSXSAVE-74] - _ = x[POPCNT-75] - _ = x[RDPRU-76] - _ = x[RDRAND-77] - _ = x[RDSEED-78] - _ = x[RDTSCP-79] - _ = x[RTM-80] - _ = x[RTM_ALWAYS_ABORT-81] - _ = x[SCE-82] - _ = x[SERIALIZE-83] - _ = x[SGX-84] - _ = x[SGXLC-85] - _ = x[SHA-86] - _ = x[SSE-87] - _ = x[SSE2-88] - _ = x[SSE3-89] - _ = x[SSE4-90] - _ = x[SSE42-91] - _ = x[SSE4A-92] - _ = x[SSSE3-93] - _ = x[STIBP-94] - _ = x[SUCCOR-95] - _ = x[TBM-96] - _ = x[TSXLDTRK-97] - _ = x[VAES-98] - _ = x[VMX-99] - _ = x[VPCLMULQDQ-100] - _ = x[WAITPKG-101] - _ = x[WBNOINVD-102] - _ = x[X87-103] - _ = x[XOP-104] - _ = x[XSAVE-105] - _ = x[AESARM-106] - _ = x[ARMCPUID-107] - _ = x[ASIMD-108] - _ = x[ASIMDDP-109] - _ = x[ASIMDHP-110] - _ = x[ASIMDRDM-111] - _ = x[ATOMICS-112] - _ = x[CRC32-113] - _ = x[DCPOP-114] - _ = x[EVTSTRM-115] - _ = x[FCMA-116] - _ = x[FP-117] - _ = x[FPHP-118] - _ = x[GPA-119] - _ = x[JSCVT-120] - _ = x[LRCPC-121] - _ = x[PMULL-122] - _ = x[SHA1-123] - _ = x[SHA2-124] - _ = x[SHA3-125] - _ = x[SHA512-126] - _ = x[SM3-127] - _ = x[SM4-128] - _ = x[SVE-129] - _ = x[lastID-130] + _ = x[AMXFP16-6] + _ = x[AMXINT8-7] + _ = x[AMXTILE-8] + _ = x[AVX-9] + _ = x[AVX2-10] + _ = x[AVX512BF16-11] + _ = x[AVX512BITALG-12] + _ = x[AVX512BW-13] + _ = x[AVX512CD-14] + _ = x[AVX512DQ-15] + _ = x[AVX512ER-16] + _ = x[AVX512F-17] + _ = x[AVX512FP16-18] + _ = x[AVX512IFMA-19] + _ = x[AVX512PF-20] + _ = x[AVX512VBMI-21] + _ = x[AVX512VBMI2-22] + _ = x[AVX512VL-23] + _ = x[AVX512VNNI-24] + _ = x[AVX512VP2INTERSECT-25] + _ = x[AVX512VPOPCNTDQ-26] + _ = x[AVXIFMA-27] + _ = x[AVXNECONVERT-28] + _ = x[AVXSLOW-29] + _ = x[AVXVNNI-30] + _ = x[AVXVNNIINT8-31] + _ = x[BMI1-32] + _ = x[BMI2-33] + _ = x[CETIBT-34] + _ = x[CETSS-35] + _ = x[CLDEMOTE-36] + _ = x[CLMUL-37] + _ = x[CLZERO-38] + _ = x[CMOV-39] + _ = x[CMPCCXADD-40] + _ = x[CMPSB_SCADBS_SHORT-41] + _ = x[CMPXCHG8-42] + _ = x[CPBOOST-43] + _ = x[CPPC-44] + _ = x[CX16-45] + _ = x[EFER_LMSLE_UNS-46] + _ = x[ENQCMD-47] + _ = x[ERMS-48] + _ = x[F16C-49] + _ = x[FLUSH_L1D-50] + _ = x[FMA3-51] + _ = x[FMA4-52] + _ = x[FP128-53] + _ = x[FP256-54] + _ = x[FSRM-55] + _ = x[FXSR-56] + _ = x[FXSROPT-57] + _ = x[GFNI-58] + _ = x[HLE-59] + _ = x[HRESET-60] + _ = x[HTT-61] + _ = x[HWA-62] + _ = x[HYBRID_CPU-63] + _ = x[HYPERVISOR-64] + _ = x[IA32_ARCH_CAP-65] + _ = x[IA32_CORE_CAP-66] + _ = x[IBPB-67] + _ = x[IBRS-68] + _ = x[IBRS_PREFERRED-69] + _ = x[IBRS_PROVIDES_SMP-70] + _ = x[IBS-71] + _ = x[IBSBRNTRGT-72] + _ = x[IBSFETCHSAM-73] + _ = x[IBSFFV-74] + _ = x[IBSOPCNT-75] + _ = x[IBSOPCNTEXT-76] + _ = x[IBSOPSAM-77] + _ = x[IBSRDWROPCNT-78] + _ = x[IBSRIPINVALIDCHK-79] + _ = x[IBS_FETCH_CTLX-80] + _ = x[IBS_OPDATA4-81] + _ = x[IBS_OPFUSE-82] + _ = x[IBS_PREVENTHOST-83] + _ = x[IBS_ZEN4-84] + _ = x[INT_WBINVD-85] + _ = x[INVLPGB-86] + _ = x[LAHF-87] + _ = x[LAM-88] + _ = x[LBRVIRT-89] + _ = x[LZCNT-90] + _ = x[MCAOVERFLOW-91] + _ = x[MCDT_NO-92] + _ = x[MCOMMIT-93] + _ = x[MD_CLEAR-94] + _ = x[MMX-95] + _ = x[MMXEXT-96] + _ = x[MOVBE-97] + _ = x[MOVDIR64B-98] + _ = x[MOVDIRI-99] + _ = x[MOVSB_ZL-100] + _ = x[MOVU-101] + _ = x[MPX-102] + _ = x[MSRIRC-103] + _ = x[MSR_PAGEFLUSH-104] + _ = x[NRIPS-105] + _ = x[NX-106] + _ = x[OSXSAVE-107] + _ = x[PCONFIG-108] + _ = x[POPCNT-109] + _ = x[PPIN-110] + _ = x[PREFETCHI-111] + _ = x[PSFD-112] + _ = x[RDPRU-113] + _ = x[RDRAND-114] + _ = x[RDSEED-115] + _ = x[RDTSCP-116] + _ = x[RTM-117] + _ = x[RTM_ALWAYS_ABORT-118] + _ = x[SERIALIZE-119] + _ = x[SEV-120] + _ = x[SEV_64BIT-121] + _ = x[SEV_ALTERNATIVE-122] + _ = x[SEV_DEBUGSWAP-123] + _ = x[SEV_ES-124] + _ = x[SEV_RESTRICTED-125] + _ = x[SEV_SNP-126] + _ = x[SGX-127] + _ = x[SGXLC-128] + _ = x[SHA-129] + _ = x[SME-130] + _ = x[SME_COHERENT-131] + _ = x[SPEC_CTRL_SSBD-132] + _ = x[SRBDS_CTRL-133] + _ = x[SSE-134] + _ = x[SSE2-135] + _ = x[SSE3-136] + _ = x[SSE4-137] + _ = x[SSE42-138] + _ = x[SSE4A-139] + _ = x[SSSE3-140] + _ = x[STIBP-141] + _ = x[STIBP_ALWAYSON-142] + _ = x[STOSB_SHORT-143] + _ = x[SUCCOR-144] + _ = x[SVM-145] + _ = x[SVMDA-146] + _ = x[SVMFBASID-147] + _ = x[SVML-148] + _ = x[SVMNP-149] + _ = x[SVMPF-150] + _ = x[SVMPFT-151] + _ = x[SYSCALL-152] + _ = x[SYSEE-153] + _ = x[TBM-154] + _ = x[TLB_FLUSH_NESTED-155] + _ = x[TME-156] + _ = x[TOPEXT-157] + _ = x[TSCRATEMSR-158] + _ = x[TSXLDTRK-159] + _ = x[VAES-160] + _ = x[VMCBCLEAN-161] + _ = x[VMPL-162] + _ = x[VMSA_REGPROT-163] + _ = x[VMX-164] + _ = x[VPCLMULQDQ-165] + _ = x[VTE-166] + _ = x[WAITPKG-167] + _ = x[WBNOINVD-168] + _ = x[X87-169] + _ = x[XGETBV1-170] + _ = x[XOP-171] + _ = x[XSAVE-172] + _ = x[XSAVEC-173] + _ = x[XSAVEOPT-174] + _ = x[XSAVES-175] + _ = x[AESARM-176] + _ = x[ARMCPUID-177] + _ = x[ASIMD-178] + _ = x[ASIMDDP-179] + _ = x[ASIMDHP-180] + _ = x[ASIMDRDM-181] + _ = x[ATOMICS-182] + _ = x[CRC32-183] + _ = x[DCPOP-184] + _ = x[EVTSTRM-185] + _ = x[FCMA-186] + _ = x[FP-187] + _ = x[FPHP-188] + _ = x[GPA-189] + _ = x[JSCVT-190] + _ = x[LRCPC-191] + _ = x[PMULL-192] + _ = x[SHA1-193] + _ = x[SHA2-194] + _ = x[SHA3-195] + _ = x[SHA512-196] + _ = x[SM3-197] + _ = x[SM4-198] + _ = x[SVE-199] + _ = x[lastID-200] _ = x[firstID-0] } -const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXSLOWBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPXCHG8CPBOOSTCX16ENQCMDERMSF16CFMA3FMA4FXSRFXSROPTGFNIHLEHTTHWAHYPERVISORIBPBIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKINT_WBINVDINVLPGBLAHFLZCNTMCAOVERFLOWMCOMMITMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMPXMSRIRCNXOSXSAVEPOPCNTRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSCESERIALIZESGXSGXLCSHASSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSUCCORTBMTSXLDTRKVAESVMXVPCLMULQDQWAITPKGWBNOINVDX87XOPXSAVEAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" +const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAVXAVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4INT_WBINVDINVLPGBLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID" -var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 58, 62, 72, 84, 92, 100, 108, 116, 123, 133, 143, 151, 161, 172, 180, 190, 208, 223, 230, 234, 238, 244, 249, 257, 262, 268, 272, 280, 287, 291, 297, 301, 305, 309, 313, 317, 324, 328, 331, 334, 337, 347, 351, 354, 364, 375, 381, 389, 400, 408, 420, 436, 446, 453, 457, 462, 473, 480, 483, 489, 494, 503, 510, 513, 519, 521, 528, 534, 539, 545, 551, 557, 560, 576, 579, 588, 591, 596, 599, 602, 606, 610, 614, 619, 624, 629, 634, 640, 643, 651, 655, 658, 668, 675, 683, 686, 689, 694, 700, 708, 713, 720, 727, 735, 742, 747, 752, 759, 763, 765, 769, 772, 777, 782, 787, 791, 795, 799, 805, 808, 811, 814, 820} +var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 65, 69, 79, 91, 99, 107, 115, 123, 130, 140, 150, 158, 168, 179, 187, 197, 215, 230, 237, 249, 256, 263, 274, 278, 282, 288, 293, 301, 306, 312, 316, 325, 343, 351, 358, 362, 366, 380, 386, 390, 394, 403, 407, 411, 416, 421, 425, 429, 436, 440, 443, 449, 452, 455, 465, 475, 488, 501, 505, 509, 523, 540, 543, 553, 564, 570, 578, 589, 597, 609, 625, 639, 650, 660, 675, 683, 693, 700, 704, 707, 714, 719, 730, 737, 744, 752, 755, 761, 766, 775, 782, 790, 794, 797, 803, 816, 821, 823, 830, 837, 843, 847, 856, 860, 865, 871, 877, 883, 886, 902, 911, 914, 923, 938, 951, 957, 971, 978, 981, 986, 989, 992, 1004, 1018, 1028, 1031, 1035, 1039, 1043, 1048, 1053, 1058, 1063, 1077, 1088, 1094, 1097, 1102, 1111, 1115, 1120, 1125, 1131, 1138, 1143, 1146, 1162, 1165, 1171, 1181, 1189, 1193, 1202, 1206, 1218, 1221, 1231, 1234, 1241, 1249, 1252, 1259, 1262, 1267, 1273, 1281, 1287, 1293, 1301, 1306, 1313, 1320, 1328, 1335, 1340, 1345, 1352, 1356, 1358, 1362, 1365, 1370, 1375, 1380, 1384, 1388, 1392, 1398, 1401, 1404, 1407, 1413} func (i FeatureID) String() string { if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) { diff --git a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go index 8d2cb036..84b1acd2 100644 --- a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go +++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go @@ -2,18 +2,120 @@ package cpuid -import "runtime" +import ( + "runtime" + "strings" + + "golang.org/x/sys/unix" +) func detectOS(c *CPUInfo) bool { + if runtime.GOOS != "ios" { + tryToFillCPUInfoFomSysctl(c) + } // There are no hw.optional sysctl values for the below features on Mac OS 11.0 // to detect their supported state dynamically. Assume the CPU features that // Apple Silicon M1 supports to be available as a minimal set of features // to all Go programs running on darwin/arm64. // TODO: Add more if we know them. c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2) - c.PhysicalCores = runtime.NumCPU() - // For now assuming 1 thread per core... - c.ThreadsPerCore = 1 - c.LogicalCores = c.PhysicalCores + return true } + +func sysctlGetBool(name string) bool { + value, err := unix.SysctlUint32(name) + if err != nil { + return false + } + return value != 0 +} + +func sysctlGetString(name string) string { + value, err := unix.Sysctl(name) + if err != nil { + return "" + } + return value +} + +func sysctlGetInt(unknown int, names ...string) int { + for _, name := range names { + value, err := unix.SysctlUint32(name) + if err != nil { + continue + } + if value != 0 { + return int(value) + } + } + return unknown +} + +func sysctlGetInt64(unknown int, names ...string) int { + for _, name := range names { + value64, err := unix.SysctlUint64(name) + if err != nil { + continue + } + if int(value64) != unknown { + return int(value64) + } + } + return unknown +} + +func setFeature(c *CPUInfo, name string, feature FeatureID) { + c.featureSet.setIf(sysctlGetBool(name), feature) +} +func tryToFillCPUInfoFomSysctl(c *CPUInfo) { + c.BrandName = sysctlGetString("machdep.cpu.brand_string") + + if len(c.BrandName) != 0 { + c.VendorString = strings.Fields(c.BrandName)[0] + } + + c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu") + c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") / + sysctlGetInt(1, "hw.physicalcpu") + c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count") + c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily") + c.Model = sysctlGetInt(0, "machdep.cpu.model") + c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize") + c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize") + c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize") + c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize") + c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize") + + // from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile + setFeature(c, "hw.optional.arm.FEAT_AES", AESARM) + setFeature(c, "hw.optional.AdvSIMD", ASIMD) + setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP) + setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM) + setFeature(c, "hw.optional.FEAT_CRC32", CRC32) + setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP) + // setFeature(c, "", EVTSTRM) + setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA) + setFeature(c, "hw.optional.arm.FEAT_FP", FP) + setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP) + setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA) + setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT) + setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC) + setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL) + setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1) + setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2) + setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3) + setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512) + // setFeature(c, "", SM3) + // setFeature(c, "", SM4) + setFeature(c, "hw.optional.arm.FEAT_SVE", SVE) + + // from empirical observation + setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP) + setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS) + setFeature(c, "hw.optional.floatingpoint", FP) + setFeature(c, "hw.optional.armv8_2_sha3", SHA3) + setFeature(c, "hw.optional.armv8_2_sha512", SHA512) + setFeature(c, "hw.optional.armv8_3_compnum", FCMA) + setFeature(c, "hw.optional.armv8_crc32", CRC32) +} -- cgit v1.2.3