diff options
Diffstat (limited to 'tools')
136 files changed, 5758 insertions, 1308 deletions
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index cb1740bc3da2..300b6e0765b2 100644 --- a/tools/arch/x86/include/asm/amd-ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h @@ -4,7 +4,7 @@ * 55898 Rev 0.35 - Feb 5, 2021 */ -#include "msr-index.h" +#include "../msr-index.h" /* IBS_OP_DATA2 DataSrc */ #define IBS_DATA_SRC_LOC_CACHE 2 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 6c2c152d8a67..bc81b9d1aeca 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -476,11 +476,11 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ -#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ -#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ -#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ +#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -519,7 +519,7 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ @@ -527,10 +527,10 @@ #define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ -#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ -#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h index 253690eb3c26..183aa662b165 100644 --- a/tools/arch/x86/include/asm/inat.h +++ b/tools/arch/x86/include/asm/inat.h @@ -82,6 +82,7 @@ #define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8)) #define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9)) #define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10)) +#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -242,4 +243,9 @@ static inline int inat_evex_scalable(insn_attr_t attr) { return attr & INAT_EVEX_SCALABLE; } + +static inline int inat_is_invalid64(insn_attr_t attr) +{ + return attr & INAT_INV64; +} #endif diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index d751eb8585d0..8d925ce9750f 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -1,5 +1,5 @@ # SPDX-License-Identifier: CC0-1.0 -# Generator: x86-cpuid-db v1.0 +# Generator: x86-cpuid-db v2.4 # # Auto-generated file. @@ -12,297 +12,298 @@ # Leaf 0H # Maximum standard leaf number + CPU vendor string - 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported - 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 - 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 - 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 + 0x0, 0, eax, 31:0, max_std_leaf , Highest standard CPUID leaf supported + 0x0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 + 0x0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 + 0x0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 # Leaf 1H # CPU FMS (Family/Model/Stepping) + standard feature flags - 1, 0, eax, 3:0, stepping , Stepping ID - 1, 0, eax, 7:4, base_model , Base CPU model ID - 1, 0, eax, 11:8, base_family_id , Base CPU family ID - 1, 0, eax, 13:12, cpu_type , CPU type - 1, 0, eax, 19:16, ext_model , Extended CPU model ID - 1, 0, eax, 27:20, ext_family , Extended CPU family ID - 1, 0, ebx, 7:0, brand_id , Brand index - 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size - 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count - 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID - 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) - 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support - 1, 0, ecx, 2, dtes64 , 64-bit DS save area - 1, 0, ecx, 3, monitor , MONITOR/MWAIT support - 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store - 1, 0, ecx, 5, vmx , Virtual Machine Extensions - 1, 0, ecx, 6, smx , Safer Mode Extensions - 1, 0, ecx, 7, est , Enhanced Intel SpeedStep - 1, 0, ecx, 8, tm2 , Thermal Monitor 2 - 1, 0, ecx, 9, ssse3 , Supplemental SSE3 - 1, 0, ecx, 10, cid , L1 Context ID - 1, 0, ecx, 11, sdbg , Sillicon Debug - 1, 0, ecx, 12, fma , FMA extensions using YMM state - 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support - 1, 0, ecx, 14, xtpr , xTPR Update Control - 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability - 1, 0, ecx, 17, pcid , Process-context identifiers - 1, 0, ecx, 18, dca , Direct Cache Access - 1, 0, ecx, 19, sse4_1 , SSE4.1 - 1, 0, ecx, 20, sse4_2 , SSE4.2 - 1, 0, ecx, 21, x2apic , X2APIC support - 1, 0, ecx, 22, movbe , MOVBE instruction support - 1, 0, ecx, 23, popcnt , POPCNT instruction support - 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation - 1, 0, ecx, 25, aes , AES instructions - 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support - 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS - 1, 0, ecx, 28, avx , AVX instructions support - 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support - 1, 0, ecx, 30, rdrand , RDRAND instruction support - 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system - 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87) - 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions - 1, 0, edx, 2, de , Debugging Extensions - 1, 0, edx, 3, pse , Page Size Extension - 1, 0, edx, 4, tsc , Time Stamp Counter - 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support) - 1, 0, edx, 6, pae , Physical Address Extensions - 1, 0, edx, 7, mce , Machine Check Exception - 1, 0, edx, 8, cx8 , CMPXCHG8B instruction - 1, 0, edx, 9, apic , APIC on-chip - 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs - 1, 0, edx, 12, mtrr , Memory Type Range Registers - 1, 0, edx, 13, pge , Page Global Extensions - 1, 0, edx, 14, mca , Machine Check Architecture - 1, 0, edx, 15, cmov , Conditional Move Instruction - 1, 0, edx, 16, pat , Page Attribute Table - 1, 0, edx, 17, pse36 , Page Size Extension (36-bit) - 1, 0, edx, 18, pn , Processor Serial Number - 1, 0, edx, 19, clflush , CLFLUSH instruction - 1, 0, edx, 21, dts , Debug Store - 1, 0, edx, 22, acpi , Thermal monitor and clock control - 1, 0, edx, 23, mmx , MMX instructions - 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions - 1, 0, edx, 25, sse , SSE instructions - 1, 0, edx, 26, sse2 , SSE2 instructions - 1, 0, edx, 27, ss , Self Snoop - 1, 0, edx, 28, ht , Hyper-threading - 1, 0, edx, 29, tm , Thermal Monitor - 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved - 1, 0, edx, 31, pbe , Pending Break Enable + 0x1, 0, eax, 3:0, stepping , Stepping ID + 0x1, 0, eax, 7:4, base_model , Base CPU model ID + 0x1, 0, eax, 11:8, base_family_id , Base CPU family ID + 0x1, 0, eax, 13:12, cpu_type , CPU type + 0x1, 0, eax, 19:16, ext_model , Extended CPU model ID + 0x1, 0, eax, 27:20, ext_family , Extended CPU family ID + 0x1, 0, ebx, 7:0, brand_id , Brand index + 0x1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size + 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU count + 0x1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID + 0x1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) + 0x1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support + 0x1, 0, ecx, 2, dtes64 , 64-bit DS save area + 0x1, 0, ecx, 3, monitor , MONITOR/MWAIT support + 0x1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store + 0x1, 0, ecx, 5, vmx , Virtual Machine Extensions + 0x1, 0, ecx, 6, smx , Safer Mode Extensions + 0x1, 0, ecx, 7, est , Enhanced Intel SpeedStep + 0x1, 0, ecx, 8, tm2 , Thermal Monitor 2 + 0x1, 0, ecx, 9, ssse3 , Supplemental SSE3 + 0x1, 0, ecx, 10, cid , L1 Context ID + 0x1, 0, ecx, 11, sdbg , Silicon Debug + 0x1, 0, ecx, 12, fma , FMA extensions using YMM state + 0x1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support + 0x1, 0, ecx, 14, xtpr , xTPR Update Control + 0x1, 0, ecx, 15, pdcm , Perfmon and Debug Capability + 0x1, 0, ecx, 17, pcid , Process-context identifiers + 0x1, 0, ecx, 18, dca , Direct Cache Access + 0x1, 0, ecx, 19, sse4_1 , SSE4.1 + 0x1, 0, ecx, 20, sse4_2 , SSE4.2 + 0x1, 0, ecx, 21, x2apic , X2APIC support + 0x1, 0, ecx, 22, movbe , MOVBE instruction support + 0x1, 0, ecx, 23, popcnt , POPCNT instruction support + 0x1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation + 0x1, 0, ecx, 25, aes , AES instructions + 0x1, 0, ecx, 26, xsave , XSAVE (and related instructions) support + 0x1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS + 0x1, 0, ecx, 28, avx , AVX instructions support + 0x1, 0, ecx, 29, f16c , Half-precision floating-point conversion support + 0x1, 0, ecx, 30, rdrand , RDRAND instruction support + 0x1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system + 0x1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87) + 0x1, 0, edx, 1, vme , Virtual-8086 Mode Extensions + 0x1, 0, edx, 2, de , Debugging Extensions + 0x1, 0, edx, 3, pse , Page Size Extension + 0x1, 0, edx, 4, tsc , Time Stamp Counter + 0x1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support) + 0x1, 0, edx, 6, pae , Physical Address Extensions + 0x1, 0, edx, 7, mce , Machine Check Exception + 0x1, 0, edx, 8, cx8 , CMPXCHG8B instruction + 0x1, 0, edx, 9, apic , APIC on-chip + 0x1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs + 0x1, 0, edx, 12, mtrr , Memory Type Range Registers + 0x1, 0, edx, 13, pge , Page Global Extensions + 0x1, 0, edx, 14, mca , Machine Check Architecture + 0x1, 0, edx, 15, cmov , Conditional Move Instruction + 0x1, 0, edx, 16, pat , Page Attribute Table + 0x1, 0, edx, 17, pse36 , Page Size Extension (36-bit) + 0x1, 0, edx, 18, pn , Processor Serial Number + 0x1, 0, edx, 19, clflush , CLFLUSH instruction + 0x1, 0, edx, 21, dts , Debug Store + 0x1, 0, edx, 22, acpi , Thermal monitor and clock control + 0x1, 0, edx, 23, mmx , MMX instructions + 0x1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions + 0x1, 0, edx, 25, sse , SSE instructions + 0x1, 0, edx, 26, sse2 , SSE2 instructions + 0x1, 0, edx, 27, ss , Self Snoop + 0x1, 0, edx, 28, ht , Hyper-threading + 0x1, 0, edx, 29, tm , Thermal Monitor + 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now reserved + 0x1, 0, edx, 31, pbe , Pending Break Enable # Leaf 2H # Intel cache and TLB information one-byte descriptors - 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried - 2, 0, eax, 15:8, desc1 , Descriptor #1 - 2, 0, eax, 23:16, desc2 , Descriptor #2 - 2, 0, eax, 30:24, desc3 , Descriptor #3 - 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set - 2, 0, ebx, 7:0, desc4 , Descriptor #4 - 2, 0, ebx, 15:8, desc5 , Descriptor #5 - 2, 0, ebx, 23:16, desc6 , Descriptor #6 - 2, 0, ebx, 30:24, desc7 , Descriptor #7 - 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set - 2, 0, ecx, 7:0, desc8 , Descriptor #8 - 2, 0, ecx, 15:8, desc9 , Descriptor #9 - 2, 0, ecx, 23:16, desc10 , Descriptor #10 - 2, 0, ecx, 30:24, desc11 , Descriptor #11 - 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set - 2, 0, edx, 7:0, desc12 , Descriptor #12 - 2, 0, edx, 15:8, desc13 , Descriptor #13 - 2, 0, edx, 23:16, desc14 , Descriptor #14 - 2, 0, edx, 30:24, desc15 , Descriptor #15 - 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set + 0x2, 0, eax, 7:0, iteration_count , Number of times this leaf must be queried + 0x2, 0, eax, 15:8, desc1 , Descriptor #1 + 0x2, 0, eax, 23:16, desc2 , Descriptor #2 + 0x2, 0, eax, 30:24, desc3 , Descriptor #3 + 0x2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set + 0x2, 0, ebx, 7:0, desc4 , Descriptor #4 + 0x2, 0, ebx, 15:8, desc5 , Descriptor #5 + 0x2, 0, ebx, 23:16, desc6 , Descriptor #6 + 0x2, 0, ebx, 30:24, desc7 , Descriptor #7 + 0x2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set + 0x2, 0, ecx, 7:0, desc8 , Descriptor #8 + 0x2, 0, ecx, 15:8, desc9 , Descriptor #9 + 0x2, 0, ecx, 23:16, desc10 , Descriptor #10 + 0x2, 0, ecx, 30:24, desc11 , Descriptor #11 + 0x2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set + 0x2, 0, edx, 7:0, desc12 , Descriptor #12 + 0x2, 0, edx, 15:8, desc13 , Descriptor #13 + 0x2, 0, edx, 23:16, desc14 , Descriptor #14 + 0x2, 0, edx, 30:24, desc15 , Descriptor #15 + 0x2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set # Leaf 4H # Intel deterministic cache parameters - 4, 31:0, eax, 4:0, cache_type , Cache type field - 4, 31:0, eax, 7:5, cache_level , Cache level (1-based) - 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level - 4, 31:0, eax, 9, fully_associative , Fully-associative cache - 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache - 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package - 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) - 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) - 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) - 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) - 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches - 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches - 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function) + 0x4, 31:0, eax, 4:0, cache_type , Cache type field + 0x4, 31:0, eax, 7:5, cache_level , Cache level (1-based) + 0x4, 31:0, eax, 8, cache_self_init , Self-initializing cache level + 0x4, 31:0, eax, 9, fully_associative , Fully-associative cache + 0x4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache + 0x4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package + 0x4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) + 0x4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) + 0x4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) + 0x4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) + 0x4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches + 0x4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches + 0x4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function) # Leaf 5H # MONITOR/MWAIT instructions enumeration - 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes - 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes - 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported - 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported - 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT - 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT - 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT - 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT - 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT - 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT - 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT - 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT + 0x5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes + 0x5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes + 0x5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported + 0x5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported + 0x5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT + 0x5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT + 0x5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT + 0x5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT + 0x5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT + 0x5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT + 0x5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT + 0x5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT # Leaf 6H # Thermal and Power Management enumeration - 6, 0, eax, 0, dtherm , Digital temprature sensor - 6, 0, eax, 1, turbo_boost , Intel Turbo Boost - 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) - 6, 0, eax, 4, pln , Power Limit Notification (PLN) event - 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension - 6, 0, eax, 6, pts , Package thermal management - 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported - 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR) - 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported - 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference - 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request - 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported - 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0 - 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change - 6, 0, eax, 16, hwp_peci_override , HWP PECI override - 6, 0, eax, 17, hwp_flexible , Flexible HWP - 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode - 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported - 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported - 6, 0, eax, 23, thread_director , Intel thread director support - 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported - 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds - 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface) - 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support - 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director - 6, 0, edx, 0, perfcap_reporting , Performance capability reporting - 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting - 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages - 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based + 0x6, 0, eax, 0, dtherm , Digital temperature sensor + 0x6, 0, eax, 1, turbo_boost , Intel Turbo Boost + 0x6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) + 0x6, 0, eax, 4, pln , Power Limit Notification (PLN) event + 0x6, 0, eax, 5, ecmd , Clock modulation duty cycle extension + 0x6, 0, eax, 6, pts , Package thermal management + 0x6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported + 0x6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR) + 0x6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported + 0x6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference + 0x6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request + 0x6, 0, eax, 13, hdc_base_regs , HDC base registers are supported + 0x6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0 + 0x6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change + 0x6, 0, eax, 16, hwp_peci_override , HWP PECI override + 0x6, 0, eax, 17, hwp_flexible , Flexible HWP + 0x6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode + 0x6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported + 0x6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported + 0x6, 0, eax, 23, thread_director , Intel thread director support + 0x6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported + 0x6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds + 0x6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface) + 0x6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support + 0x6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director + 0x6, 0, edx, 0, perfcap_reporting , Performance capability reporting + 0x6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting + 0x6, 0, edx, 11:8, feedback_sz , Feedback interface structure size, in 4K pages + 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU hardware feedback interface index # Leaf 7H # Extended CPU features enumeration - 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves - 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support - 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported - 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) - 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1 - 7, 0, ebx, 4, hle , Hardware Lock Elision - 7, 0, ebx, 5, avx2 , AVX2 instruction set - 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions - 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection - 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2 - 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB - 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID) - 7, 0, ebx, 11, rtm , Intel restricted transactional memory - 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring - 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) - 7, 0, ebx, 14, mpx , Intel memory protection extensions - 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent - 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions - 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions - 7, 0, ebx, 18, rdseed , RDSEED instruction - 7, 0, ebx, 19, adx , ADCX/ADOX instructions - 7, 0, ebx, 20, smap , Supervisor mode access prevention - 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add - 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction - 7, 0, ebx, 24, clwb , CLWB instruction - 7, 0, ebx, 25, intel_pt , Intel processor trace - 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions - 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs - 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs - 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions - 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions - 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions - 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) - 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs - 7, 0, ecx, 2, umip , User mode instruction protection - 7, 0, ecx, 3, pku , Protection keys for user-space - 7, 0, ecx, 4, ospke , OS protection keys enable - 7, 0, ecx, 5, waitpkg , WAITPKG instructions - 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2 - 7, 0, ecx, 7, cet_ss , CET shadow stack features - 7, 0, ecx, 8, gfni , Galois field new instructions - 7, 0, ecx, 9, vaes , Vector AES instrs - 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support - 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions - 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle - 7, 0, ecx, 13, tme , Intel total memory encryption - 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW - 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging) - 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode - 7, 0, ecx, 22, rdpid , RDPID instruction - 7, 0, ecx, 23, key_locker , Intel key locker support - 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection - 7, 0, ecx, 25, cldemote , CLDEMOTE instruction - 7, 0, ecx, 27, movdiri , MOVDIRI instruction - 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction - 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S}) - 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration - 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages - 7, 0, edx, 1, sgx_keys , Intel SGX attestation services - 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions - 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision - 7, 0, edx, 4, fsrm , Fast short REP MOV - 7, 0, edx, 5, uintr , CPU supports user interrupts - 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions - 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available - 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support - 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts - 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported - 7, 0, edx, 14, serialize , SERIALIZE instruction - 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part' - 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking - 7, 0, edx, 18, pconfig , PCONFIG instruction - 7, 0, edx, 19, arch_lbr , Intel architectural LBRs - 7, 0, edx, 20, ibt , CET indirect branch tracking - 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support - 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions - 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support - 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support - 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions) - 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors - 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR - 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR - 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR - 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable - 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions - 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions - 7, 1, eax, 6, lass , Linear address space separation - 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions - 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported - 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB - 7, 1, eax, 11, fsrs , Fast short REP STOSB - 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB - 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions - 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS - 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing) - 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations - 7, 1, eax, 22, hreset , History reset support - 7, 1, eax, 23, avx_ifma , Integer fused multiply add - 7, 1, eax, 26, lam , Linear address masking - 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions - 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs) - 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions - 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions - 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids) - 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions - 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use - 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable - 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S} - 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S} - 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U - 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S - 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed - 7, 2, edx, 6, uclock_disable , UC-lock disable is supported + 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of leaf 0x7 subleaves + 0x7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support + 0x7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported + 0x7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) + 0x7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1 + 0x7, 0, ebx, 4, hle , Hardware Lock Elision + 0x7, 0, ebx, 5, avx2 , AVX2 instruction set + 0x7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions + 0x7, 0, ebx, 7, smep , Supervisor Mode Execution Protection + 0x7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2 + 0x7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB + 0x7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID) + 0x7, 0, ebx, 11, rtm , Intel restricted transactional memory + 0x7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring + 0x7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) + 0x7, 0, ebx, 14, mpx , Intel memory protection extensions + 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcement + 0x7, 0, ebx, 16, avx512f , AVX-512 foundation instructions + 0x7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions + 0x7, 0, ebx, 18, rdseed , RDSEED instruction + 0x7, 0, ebx, 19, adx , ADCX/ADOX instructions + 0x7, 0, ebx, 20, smap , Supervisor mode access prevention + 0x7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add + 0x7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction + 0x7, 0, ebx, 24, clwb , CLWB instruction + 0x7, 0, ebx, 25, intel_pt , Intel processor trace + 0x7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions + 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instructions + 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instructions + 0x7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions + 0x7, 0, ebx, 30, avx512bw , AVX-512 byte/word instructions + 0x7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions + 0x7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) + 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instructions + 0x7, 0, ecx, 2, umip , User mode instruction protection + 0x7, 0, ecx, 3, pku , Protection keys for user-space + 0x7, 0, ecx, 4, ospke , OS protection keys enable + 0x7, 0, ecx, 5, waitpkg , WAITPKG instructions + 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instructions group 2 + 0x7, 0, ecx, 7, cet_ss , CET shadow stack features + 0x7, 0, ecx, 8, gfni , Galois field new instructions + 0x7, 0, ecx, 9, vaes , Vector AES instructions + 0x7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support + 0x7, 0, ecx, 11, avx512_vnni , Vector neural network instructions + 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bitwise algorithms + 0x7, 0, ecx, 13, tme , Intel total memory encryption + 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DWORD/QWORD + 0x7, 0, ecx, 16, la57 , 57-bit linear addresses (five-level paging) + 0x7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode + 0x7, 0, ecx, 22, rdpid , RDPID instruction + 0x7, 0, ecx, 23, key_locker , Intel key locker support + 0x7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection + 0x7, 0, ecx, 25, cldemote , CLDEMOTE instruction + 0x7, 0, ecx, 27, movdiri , MOVDIRI instruction + 0x7, 0, ecx, 28, movdir64b , MOVDIR64B instruction + 0x7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S}) + 0x7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration + 0x7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages + 0x7, 0, edx, 1, sgx_keys , Intel SGX attestation services + 0x7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions + 0x7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision + 0x7, 0, edx, 4, fsrm , Fast short REP MOV + 0x7, 0, edx, 5, uintr , CPU supports user interrupts + 0x7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions + 0x7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available + 0x7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support + 0x7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts + 0x7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported + 0x7, 0, edx, 14, serialize , SERIALIZE instruction + 0x7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part' + 0x7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking + 0x7, 0, edx, 18, pconfig , PCONFIG instruction + 0x7, 0, edx, 19, arch_lbr , Intel architectural LBRs + 0x7, 0, edx, 20, ibt , CET indirect branch tracking + 0x7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support + 0x7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions + 0x7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support + 0x7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support + 0x7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions) + 0x7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors + 0x7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR + 0x7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR + 0x7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR + 0x7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable + 0x7, 1, eax, 4, avx_vnni , AVX-VNNI instructions + 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bfloat16 instructions + 0x7, 1, eax, 6, lass , Linear address space separation + 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions + 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: leaf 0x23 is supported + 0x7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB + 0x7, 1, eax, 11, fsrs , Fast short REP STOSB + 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB + 0x7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions + 0x7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS + 0x7, 1, eax, 19, wrmsrns , WRMSRNS instruction (WRMSR-non-serializing) + 0x7, 1, eax, 20, nmi_src , NMI-source reporting with FRED event data + 0x7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations + 0x7, 1, eax, 22, hreset , History reset support + 0x7, 1, eax, 23, avx_ifma , Integer fused multiply add + 0x7, 1, eax, 26, lam , Linear address masking + 0x7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions + 0x7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs) + 0x7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions + 0x7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions + 0x7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids) + 0x7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions + 0x7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use + 0x7, 2, edx, 0, intel_psfd , Intel predictive store forward disable + 0x7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S} + 0x7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S} + 0x7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U + 0x7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S + 0x7, 2, edx, 5, mcdt_no , MCDT mitigation not needed + 0x7, 2, edx, 6, uclock_disable , UC-lock disable is supported # Leaf 9H # Intel DCA (Direct Cache Access) enumeration - 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS + 0x9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS # Leaf AH # Intel PMU (Performance Monitoring Unit) enumeration @@ -310,7 +311,7 @@ 0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID 0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU 0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters - 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector + 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of leaf 0xa EBX bit vector 0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available 0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available 0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available @@ -339,18 +340,18 @@ 0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported 0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported 0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported - 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) - 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) - 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) - 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) - 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) - 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg) - 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state) - 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state) + 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 registers) + 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers) + 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 registers) + 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers) + 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers) + 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU registers) + 0xd, 0, eax, 11, xcr0_cet_u , XCR0.CET_U (bit 11) supported (CET user state) + 0xd, 0, eax, 12, xcr0_cet_s , XCR0.CET_S (bit 12) supported (CET supervisor state) 0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) 0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA) - 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features - 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features + 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTOR area byte size, for XCR0 enabled features + 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTOR area max byte size, all CPU features 0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling) 0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction 0xd, 1, eax, 1, xsavec , XSAVEC instruction @@ -369,7 +370,7 @@ 0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes 0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes 0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit - 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned + 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature XSAVE area is 64-byte aligned # Leaf FH # Intel RDT / AMD PQoS resource monitoring @@ -426,17 +427,17 @@ 0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported 0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported 0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported - 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) - 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) - 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) - 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) - 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) - 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg) + 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 registers) + 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers) + 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 registers) + 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers) + 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers) + 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU registers) 0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) 0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA) 0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout) - 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31] - 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51] + 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base address, bits[12:31] + 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base address, bits[32:51] 0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding 0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31] 0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51] @@ -444,7 +445,7 @@ # Leaf 14H # Intel Processor Trace enumeration - 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf + 0x14, 0, eax, 31:0, pt_max_subleaf , Maximum leaf 0x14 subleaf 0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible 0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode 0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation @@ -472,7 +473,7 @@ 0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz # Leaf 16H -# Intel processor fequency enumeration +# Intel processor frequency enumeration 0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz 0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz @@ -481,9 +482,9 @@ # Leaf 17H # Intel SoC vendor attributes enumeration - 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf + 0x17, 0, eax, 31:0, soc_max_subleaf , Maximum leaf 0x17 subleaf 0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID - 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel) + 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumeration scheme (not Intel) 0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor 0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor 0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3) @@ -494,18 +495,18 @@ # Leaf 18H # Intel determenestic address translation (TLB) parameters - 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf + 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Maximum leaf 0x18 subleaf 0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported 0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported 0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported 0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported - 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct + 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this structure 0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity 0x18, 31:0, ecx, 31:0, n_sets , Number of sets 0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type) 0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based) 0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure - 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1 + 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max number of addressable IDs for logical CPUs sharing this TLB - 1 # Leaf 19H # Intel Key Locker enumeration @@ -568,7 +569,7 @@ # Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration 0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns) - 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes) + 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maximum SIMD dimension, N (column bytes) # Leaf 1FH # Intel extended topology enumeration v2 @@ -623,9 +624,9 @@ 0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11 # Leaf 80000000H -# Maximum extended leaf number + CPU vendor string (AMD) +# Maximum extended leaf number + AMD/Transmeta CPU vendor string -0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported +0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended CPUID leaf supported 0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3 0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11 0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7 @@ -636,6 +637,7 @@ 0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID 0x80000001, 0, eax, 7:4, e_base_model , Base processor model 0x80000001, 0, eax, 11:8, e_base_family , Base processor family +0x80000001, 0, eax, 13:12, e_base_type , Base processor type (Transmeta) 0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model 0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family 0x80000001, 0, ebx, 15:0, brand_id , Brand ID @@ -659,7 +661,7 @@ 0x80000001, 0, ecx, 17, tce , Translation cache extension 0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c) 0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations -0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d) +0x80000001, 0, ecx, 22, topoext , Topology Extensions (leaf 0x8000001d) 0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions 0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions 0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension @@ -687,6 +689,7 @@ 0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit 0x80000001, 0, edx, 20, nx , No-execute page protection 0x80000001, 0, edx, 22, mmxext , AMD MMX extensions +0x80000001, 0, edx, 23, e_mmx , MMX instructions 0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions 0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations 0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support @@ -720,11 +723,11 @@ 0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47 # Leaf 80000005H -# AMD L1 cache and L1 TLB enumeration +# AMD/Transmeta L1 cache and L1 TLB enumeration -0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages +0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entries, 2M and 4M pages 0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages -0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages +0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entries, 2M and 4M pages 0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages 0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages 0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages @@ -763,11 +766,11 @@ # CPU power management (mostly AMD) and AMD RAS enumeration 0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal -0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors +0x80000007, 0, ebx, 1, succor , Software containment of uncorrectable errors 0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs 0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs) 0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio -0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor +0x80000007, 0, edx, 0, digital_temp , Digital temperature sensor 0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling 0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling 0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip) @@ -810,7 +813,7 @@ 0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number 0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable 0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable) -0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW) +0x80000008, 0, ebx, 26, amd_ssb_no , SSBD is not needed (fixed in hardware) 0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control 0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable 0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion @@ -838,7 +841,7 @@ 0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter 0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold 0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller -0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt) +0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virtualization) 0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag 0x8000000a, 0, edx, 17, gmet , Guest mode execution trap 0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC @@ -850,7 +853,7 @@ 0x8000000a, 0, edx, 25, vnmi , NMI virtualization 0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization 0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change -0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check +0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME address check # Leaf 80000019H # AMD TLB 1G-pages enumeration @@ -891,20 +894,20 @@ # AMD LWP (Lightweight Profiling) 0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS) -0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS -0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS -0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS -0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS -0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS -0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS +0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction is supported by OS +0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event is supported by OS +0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event is supported by OS +0x8000001c, 0, eax, 4, os_lwp_dme , Dcache Miss Event is supported by OS +0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event is supported by OS +0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event is supported by OS 0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS 0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS 0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS 0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords 0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes -0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included) +0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventID value (EventID 255 not included) 0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block -0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31) +0x8000001c, 0, ecx, 4:0, lwp_latency_max , Number of bits in cache latency counters (10 to 31) 0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference 0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded 0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version @@ -913,16 +916,16 @@ 0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported 0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level 0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency -0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware -0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW -0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW -0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW -0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW -0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW -0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW -0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW -0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW -0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW +0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in hardware +0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction is available in hardware +0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event is available in hardware +0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event is available in hardware +0x8000001c, 0, edx, 4, hw_lwp_dme , Dcache Miss Event is available in hardware +0x8000001c, 0, edx, 5, hw_lwp_cnh , Clocks Not Halted event is available in hardware +0x8000001c, 0, edx, 6, hw_lwp_rnh , Reference clocks Not Halted event is available in hardware +0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in hardware +0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in hardware +0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in hardware # Leaf 8000001DH # AMD deterministic cache parameters @@ -958,10 +961,10 @@ 0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported 0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported 0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported -0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported +0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadow stack supported 0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported 0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX -0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains +0x8000001f, 0, eax, 10, sme_coherent , Cache coherency is enforced across encryption domains 0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor 0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported 0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported @@ -973,13 +976,13 @@ 0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests 0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported 0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported -0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported +0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000) is supported 0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported 0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption 0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits 0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported 0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests -0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest +0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Minimum ASID for SEV-enabled SEV-ES-disabled guest # Leaf 80000020H # AMD Platform QoS extended feature IDs @@ -988,6 +991,8 @@ 0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support 0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support 0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support +0x80000020, 0, ebx, 5, abmc , Assignable Bandwidth Monitoring Counters +0x80000020, 0, ebx, 6, sdciae , Smart Data Cache Injection (SDCI) Allocation Enforcement 0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size 0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based) 0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size @@ -1007,17 +1012,26 @@ 0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints 0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing 0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC -0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported +0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock 0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base -0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported -0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported -0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present -0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported -0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported -0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported +0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore +0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS +0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not available +0x80000021, 0, eax, 10, fsrs , Fast Short Rep STOSB +0x80000021, 0, eax, 11, fsrc , Fast Short Rep CMPSB +0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is available +0x80000021, 0, eax, 16, opcode_reclaim , Reserves opcode space 0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported -0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported -0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units +0x80000021, 0, eax, 18, epsf , Enhanced Predictive Store Forwarding +0x80000021, 0, eax, 22, wl_feedback , Workload-based heuristic feedback to OS +0x80000021, 0, eax, 24, eraps , Enhanced Return Address Predictor Security +0x80000021, 0, eax, 27, sbpb , Selective Branch Predictor Barrier +0x80000021, 0, eax, 28, ibpb_brtype , Branch predictions flushed from CPU branch predictor +0x80000021, 0, eax, 29, srso_no , CPU is not subject to the SRSO vulnerability +0x80000021, 0, eax, 30, srso_uk_no , CPU is not vulnerable to SRSO at user-kernel boundary +0x80000021, 0, eax, 31, srso_msr_fix , Software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO +0x80000021, 0, ebx, 15:0, microcode_patch_size , Size of microcode patch, in 16-byte units +0x80000021, 0, ebx, 23:16, rap_size , Return Address Predictor size # Leaf 80000022H # AMD Performance Monitoring v2 enumeration @@ -1025,7 +1039,7 @@ 0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported 0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack) 0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported -0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters +0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core performance counters 0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries 0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters 0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters @@ -1035,7 +1049,7 @@ # AMD Secure Multi-key Encryption enumeration 0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported -0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys +0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total number of available encryption keys # Leaf 80000026H # AMD extended topology enumeration v2 @@ -1051,3 +1065,108 @@ 0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID) 0x80000026, 3:0, ecx, 15:8, domain_type , This domain type 0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU + +# Leaf 80860000H +# Maximum Transmeta leaf number + CPU vendor ID string + +0x80860000, 0, eax, 31:0, max_tra_leaf , Maximum supported Transmeta leaf number +0x80860000, 0, ebx, 31:0, cpu_vendorid_0 , Transmeta Vendor ID string bytes 0 - 3 +0x80860000, 0, ecx, 31:0, cpu_vendorid_2 , Transmeta Vendor ID string bytes 8 - 11 +0x80860000, 0, edx, 31:0, cpu_vendorid_1 , Transmeta Vendor ID string bytes 4 - 7 + +# Leaf 80860001H +# Transmeta extended CPU information + +0x80860001, 0, eax, 3:0, stepping , Stepping ID +0x80860001, 0, eax, 7:4, base_model , Base CPU model ID +0x80860001, 0, eax, 11:8, base_family_id , Base CPU family ID +0x80860001, 0, eax, 13:12, cpu_type , CPU type +0x80860001, 0, ebx, 7:0, cpu_rev_mask_minor , CPU revision ID, mask minor +0x80860001, 0, ebx, 15:8, cpu_rev_mask_major , CPU revision ID, mask major +0x80860001, 0, ebx, 23:16, cpu_rev_minor , CPU revision ID, minor +0x80860001, 0, ebx, 31:24, cpu_rev_major , CPU revision ID, major +0x80860001, 0, ecx, 31:0, cpu_base_mhz , CPU nominal frequency, in MHz +0x80860001, 0, edx, 0, recovery , Recovery CMS is active (after bad flush) +0x80860001, 0, edx, 1, longrun , LongRun power management capabilities +0x80860001, 0, edx, 3, lrti , LongRun Table Interface + +# Leaf 80860002H +# Transmeta Code Morphing Software (CMS) enumeration + +0x80860002, 0, eax, 31:0, cpu_rev_id , CPU revision ID +0x80860002, 0, ebx, 7:0, cms_rev_mask_2 , CMS revision ID, mask component 2 +0x80860002, 0, ebx, 15:8, cms_rev_mask_1 , CMS revision ID, mask component 1 +0x80860002, 0, ebx, 23:16, cms_rev_minor , CMS revision ID, minor +0x80860002, 0, ebx, 31:24, cms_rev_major , CMS revision ID, major +0x80860002, 0, ecx, 31:0, cms_rev_mask_3 , CMS revision ID, mask component 3 + +# Leaf 80860003H +# Transmeta CPU information string, bytes 0 - 15 + +0x80860003, 0, eax, 31:0, cpu_info_0 , CPU info string bytes 0 - 3 +0x80860003, 0, ebx, 31:0, cpu_info_1 , CPU info string bytes 4 - 7 +0x80860003, 0, ecx, 31:0, cpu_info_2 , CPU info string bytes 8 - 11 +0x80860003, 0, edx, 31:0, cpu_info_3 , CPU info string bytes 12 - 15 + +# Leaf 80860004H +# Transmeta CPU information string, bytes 16 - 31 + +0x80860004, 0, eax, 31:0, cpu_info_4 , CPU info string bytes 16 - 19 +0x80860004, 0, ebx, 31:0, cpu_info_5 , CPU info string bytes 20 - 23 +0x80860004, 0, ecx, 31:0, cpu_info_6 , CPU info string bytes 24 - 27 +0x80860004, 0, edx, 31:0, cpu_info_7 , CPU info string bytes 28 - 31 + +# Leaf 80860005H +# Transmeta CPU information string, bytes 32 - 47 + +0x80860005, 0, eax, 31:0, cpu_info_8 , CPU info string bytes 32 - 35 +0x80860005, 0, ebx, 31:0, cpu_info_9 , CPU info string bytes 36 - 39 +0x80860005, 0, ecx, 31:0, cpu_info_10 , CPU info string bytes 40 - 43 +0x80860005, 0, edx, 31:0, cpu_info_11 , CPU info string bytes 44 - 47 + +# Leaf 80860006H +# Transmeta CPU information string, bytes 48 - 63 + +0x80860006, 0, eax, 31:0, cpu_info_12 , CPU info string bytes 48 - 51 +0x80860006, 0, ebx, 31:0, cpu_info_13 , CPU info string bytes 52 - 55 +0x80860006, 0, ecx, 31:0, cpu_info_14 , CPU info string bytes 56 - 59 +0x80860006, 0, edx, 31:0, cpu_info_15 , CPU info string bytes 60 - 63 + +# Leaf 80860007H +# Transmeta live CPU information + +0x80860007, 0, eax, 31:0, cpu_cur_mhz , Current CPU frequency, in MHz +0x80860007, 0, ebx, 31:0, cpu_cur_voltage , Current CPU voltage, in millivolts +0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100 +0x80860007, 0, edx, 31:0, cpu_cur_gate_delay , Current CPU gate delay, in femtoseconds + +# Leaf C0000000H +# Maximum Centaur/Zhaoxin leaf number + +0xc0000000, 0, eax, 31:0, max_cntr_leaf , Maximum Centaur/Zhaoxin leaf number + +# Leaf C0000001H +# Centaur/Zhaoxin extended CPU features + +0xc0000001, 0, edx, 0, ccs_sm2 , CCS SM2 instructions +0xc0000001, 0, edx, 1, ccs_sm2_en , CCS SM2 enabled +0xc0000001, 0, edx, 2, xstore , Random Number Generator +0xc0000001, 0, edx, 3, xstore_en , RNG enabled +0xc0000001, 0, edx, 4, ccs_sm3_sm4 , CCS SM3 and SM4 instructions +0xc0000001, 0, edx, 5, ccs_sm3_sm4_en , CCS SM3/SM4 enabled +0xc0000001, 0, edx, 6, ace , Advanced Cryptography Engine +0xc0000001, 0, edx, 7, ace_en , ACE enabled +0xc0000001, 0, edx, 8, ace2 , Advanced Cryptography Engine v2 +0xc0000001, 0, edx, 9, ace2_en , ACE v2 enabled +0xc0000001, 0, edx, 10, phe , PadLock Hash Engine +0xc0000001, 0, edx, 11, phe_en , PHE enabled +0xc0000001, 0, edx, 12, pmm , PadLock Montgomery Multiplier +0xc0000001, 0, edx, 13, pmm_en , PMM enabled +0xc0000001, 0, edx, 16, parallax , Parallax auto adjust processor voltage +0xc0000001, 0, edx, 17, parallax_en , Parallax enabled +0xc0000001, 0, edx, 20, tm3 , Thermal Monitor v3 +0xc0000001, 0, edx, 21, tm3_en , TM v3 enabled +0xc0000001, 0, edx, 25, phe2 , PadLock Hash Engine v2 (SHA384/SHA512) +0xc0000001, 0, edx, 26, phe2_en , PHE v2 enabled +0xc0000001, 0, edx, 27, rsa , RSA instructions (XMODEXP/MONTMUL2) +0xc0000001, 0, edx, 28, rsa_en , RSA instructions enabled diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 1b25c0a95d3f..7dc6b9235d02 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -1,14 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE -#include <stdio.h> +#include <cpuid.h> +#include <err.h> +#include <getopt.h> #include <stdbool.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> -#include <getopt.h> #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) < (b)) ? (a) : (b)) +#define __noreturn __attribute__((__noreturn__)) typedef unsigned int u32; typedef unsigned long long u64; @@ -49,7 +52,7 @@ static const char * const reg_names[] = { struct subleaf { u32 index; u32 sub; - u32 eax, ebx, ecx, edx; + u32 output[NR_REGS]; struct reg_desc info[NR_REGS]; }; @@ -63,21 +66,64 @@ struct cpuid_func { int nr; }; +enum range_index { + RANGE_STD = 0, /* Standard */ + RANGE_EXT = 0x80000000, /* Extended */ + RANGE_TSM = 0x80860000, /* Transmeta */ + RANGE_CTR = 0xc0000000, /* Centaur/Zhaoxin */ +}; + +#define CPUID_INDEX_MASK 0xffff0000 +#define CPUID_FUNCTION_MASK (~CPUID_INDEX_MASK) + struct cpuid_range { /* array of main leafs */ struct cpuid_func *funcs; /* number of valid leafs */ int nr; - bool is_ext; + enum range_index index; }; -/* - * basic: basic functions range: [0... ] - * ext: extended functions range: [0x80000000... ] - */ -struct cpuid_range *leafs_basic, *leafs_ext; +static struct cpuid_range ranges[] = { + { .index = RANGE_STD, }, + { .index = RANGE_EXT, }, + { .index = RANGE_TSM, }, + { .index = RANGE_CTR, }, +}; + +static char *range_to_str(struct cpuid_range *range) +{ + switch (range->index) { + case RANGE_STD: return "Standard"; + case RANGE_EXT: return "Extended"; + case RANGE_TSM: return "Transmeta"; + case RANGE_CTR: return "Centaur"; + default: return NULL; + } +} + +#define __for_each_cpuid_range(range, __condition) \ + for (unsigned int i = 0; \ + i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]) && (__condition); \ + i++) + +#define for_each_valid_cpuid_range(range) __for_each_cpuid_range(range, (range)->nr != 0) +#define for_each_cpuid_range(range) __for_each_cpuid_range(range, true) + +struct cpuid_range *index_to_cpuid_range(u32 index) +{ + u32 func_idx = index & CPUID_FUNCTION_MASK; + u32 range_idx = index & CPUID_INDEX_MASK; + struct cpuid_range *range; + + for_each_valid_cpuid_range(range) { + if (range->index == range_idx && (u32)range->nr > func_idx) + return range; + } + + return NULL; +} -static bool is_amd; static bool show_details; static bool show_raw; static bool show_flags_only = true; @@ -85,16 +131,16 @@ static u32 user_index = 0xFFFFFFFF; static u32 user_sub = 0xFFFFFFFF; static int flines; -static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} +/* + * Force using <cpuid.h> __cpuid_count() instead of __cpuid(). The + * latter leaves ECX uninitialized, which can break CPUID queries. + */ + +#define cpuid(leaf, a, b, c, d) \ + __cpuid_count(leaf, 0, a, b, c, d) + +#define cpuid_count(leaf, subleaf, a, b, c, d) \ + __cpuid_count(leaf, subleaf, a, b, c, d) static inline bool has_subleafs(u32 f) { @@ -117,11 +163,11 @@ static void leaf_print_raw(struct subleaf *leaf) if (leaf->sub == 0) printf("0x%08x: subleafs:\n", leaf->index); - printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", - leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->sub, + leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]); } else { - printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", - leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->index, + leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]); } } @@ -140,19 +186,19 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, * Cut off vendor-prefix from CPUID function as we're using it as an * index into ->funcs. */ - func = &range->funcs[f & 0xffff]; + func = &range->funcs[f & CPUID_FUNCTION_MASK]; if (!func->leafs) { func->leafs = malloc(sizeof(struct subleaf)); if (!func->leafs) - perror("malloc func leaf"); + err(EXIT_FAILURE, NULL); func->nr = 1; } else { s = func->nr; func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf)); if (!func->leafs) - perror("realloc f->leafs"); + err(EXIT_FAILURE, NULL); func->nr++; } @@ -161,84 +207,73 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, leaf->index = f; leaf->sub = subleaf; - leaf->eax = a; - leaf->ebx = b; - leaf->ecx = c; - leaf->edx = d; + leaf->output[R_EAX] = a; + leaf->output[R_EBX] = b; + leaf->output[R_ECX] = c; + leaf->output[R_EDX] = d; return false; } static void raw_dump_range(struct cpuid_range *range) { - u32 f; - int i; - - printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic"); + printf("%s Leafs :\n", range_to_str(range)); printf("================\n"); - for (f = 0; (int)f < range->nr; f++) { + for (u32 f = 0; (int)f < range->nr; f++) { struct cpuid_func *func = &range->funcs[f]; - u32 index = f; - - if (range->is_ext) - index += 0x80000000; /* Skip leaf without valid items */ if (!func->nr) continue; /* First item is the main leaf, followed by all subleafs */ - for (i = 0; i < func->nr; i++) + for (int i = 0; i < func->nr; i++) leaf_print_raw(&func->leafs[i]); } } #define MAX_SUBLEAF_NUM 64 -struct cpuid_range *setup_cpuid_range(u32 input_eax) +#define MAX_RANGE_INDEX_OFFSET 0xff +void setup_cpuid_range(struct cpuid_range *range) { - u32 max_func, idx_func, subleaf, max_subleaf; - u32 eax, ebx, ecx, edx, f = input_eax; - struct cpuid_range *range; - bool allzero; - - eax = input_eax; - ebx = ecx = edx = 0; + u32 max_func, range_funcs_sz; + u32 eax, ebx, ecx, edx; - cpuid(&eax, &ebx, &ecx, &edx); - max_func = eax; - idx_func = (max_func & 0xffff) + 1; + cpuid(range->index, max_func, ebx, ecx, edx); - range = malloc(sizeof(struct cpuid_range)); - if (!range) - perror("malloc range"); + /* + * If the CPUID range's maximum function value is garbage, then it + * is not recognized by this CPU. Set the range's number of valid + * leaves to zero so that for_each_valid_cpu_range() can ignore it. + */ + if (max_func < range->index || max_func > (range->index + MAX_RANGE_INDEX_OFFSET)) { + range->nr = 0; + return; + } - if (input_eax & 0x80000000) - range->is_ext = true; - else - range->is_ext = false; + range->nr = (max_func & CPUID_FUNCTION_MASK) + 1; + range_funcs_sz = range->nr * sizeof(struct cpuid_func); - range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); + range->funcs = malloc(range_funcs_sz); if (!range->funcs) - perror("malloc range->funcs"); + err(EXIT_FAILURE, NULL); - range->nr = idx_func; - memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); + memset(range->funcs, 0, range_funcs_sz); - for (; f <= max_func; f++) { - eax = f; - subleaf = ecx = 0; + for (u32 f = range->index; f <= max_func; f++) { + u32 max_subleaf = MAX_SUBLEAF_NUM; + bool allzero; - cpuid(&eax, &ebx, &ecx, &edx); - allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); + cpuid(f, eax, ebx, ecx, edx); + + allzero = cpuid_store(range, f, 0, eax, ebx, ecx, edx); if (allzero) continue; if (!has_subleafs(f)) continue; - max_subleaf = MAX_SUBLEAF_NUM; - /* * Some can provide the exact number of subleafs, * others have to be tried (0xf) @@ -256,20 +291,15 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) if (f == 0x80000026) max_subleaf = 5; - for (subleaf = 1; subleaf < max_subleaf; subleaf++) { - eax = f; - ecx = subleaf; + for (u32 subleaf = 1; subleaf < max_subleaf; subleaf++) { + cpuid_count(f, subleaf, eax, ebx, ecx, edx); - cpuid(&eax, &ebx, &ecx, &edx); - allzero = cpuid_store(range, f, subleaf, - eax, ebx, ecx, edx); + allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); if (allzero) continue; } } - - return range; } /* @@ -280,15 +310,13 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) * 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs * 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3) */ -static int parse_line(char *line) +static void parse_line(char *line) { char *str; - int i; struct cpuid_range *range; struct cpuid_func *func; struct subleaf *leaf; u32 index; - u32 sub; char buffer[512]; char *buf; /* @@ -310,12 +338,12 @@ static int parse_line(char *line) /* Skip comments and NULL line */ if (line[0] == '#' || line[0] == '\n') - return 0; + return; strncpy(buffer, line, 511); buffer[511] = 0; str = buffer; - for (i = 0; i < 5; i++) { + for (int i = 0; i < 5; i++) { tokens[i] = strtok(str, ","); if (!tokens[i]) goto err_exit; @@ -328,21 +356,19 @@ static int parse_line(char *line) /* index/main-leaf */ index = strtoull(tokens[0], NULL, 0); - if (index & 0x80000000) - range = leafs_ext; - else - range = leafs_basic; - - index &= 0x7FFFFFFF; - /* Skip line parsing for non-existing indexes */ - if ((int)index >= range->nr) - return -1; + /* + * Skip line parsing if the index is not covered by known-valid + * CPUID ranges on this CPU. + */ + range = index_to_cpuid_range(index); + if (!range) + return; + /* Skip line parsing if the index CPUID output is all zero */ + index &= CPUID_FUNCTION_MASK; func = &range->funcs[index]; - - /* Return if the index has no valid item on this platform */ if (!func->nr) - return 0; + return; /* subleaf */ buf = tokens[1]; @@ -355,11 +381,11 @@ static int parse_line(char *line) subleaf_start = strtoul(start, NULL, 0); subleaf_end = min(subleaf_end, (u32)(func->nr - 1)); if (subleaf_start > subleaf_end) - return 0; + return; } else { subleaf_start = subleaf_end; if (subleaf_start > (u32)(func->nr - 1)) - return 0; + return; } /* register */ @@ -382,7 +408,7 @@ static int parse_line(char *line) bit_end = strtoul(end, NULL, 0); bit_start = (start) ? strtoul(start, NULL, 0) : bit_end; - for (sub = subleaf_start; sub <= subleaf_end; sub++) { + for (u32 sub = subleaf_start; sub <= subleaf_end; sub++) { leaf = &func->leafs[sub]; reg = &leaf->info[reg_index]; bdesc = ®->descs[reg->nr++]; @@ -392,12 +418,11 @@ static int parse_line(char *line) strcpy(bdesc->simp, strtok(tokens[4], " \t")); strcpy(bdesc->detail, tokens[5]); } - return 0; + return; err_exit: - printf("Warning: wrong line format:\n"); - printf("\tline[%d]: %s\n", flines, line); - return -1; + warnx("Wrong line format:\n" + "\tline[%d]: %s", flines, line); } /* Parse csv file, and construct the array of all leafs and subleafs */ @@ -418,10 +443,8 @@ static void parse_text(void) file = fopen("./cpuid.csv", "r"); } - if (!file) { - printf("Fail to open '%s'\n", filename); - return; - } + if (!file) + err(EXIT_FAILURE, "%s", filename); while (1) { ret = getline(&line, &len, file); @@ -436,21 +459,13 @@ static void parse_text(void) fclose(file); } - -/* Decode every eax/ebx/ecx/edx */ -static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) +static void show_reg(const struct reg_desc *rdesc, u32 value) { - struct bits_desc *bdesc; - int start, end, i; + const struct bits_desc *bdesc; + int start, end; u32 mask; - if (!rdesc->nr) { - if (show_details) - printf("\t %s: 0x%08x\n", reg_names[reg], value); - return; - } - - for (i = 0; i < rdesc->nr; i++) { + for (int i = 0; i < rdesc->nr; i++) { bdesc = &rdesc->descs[i]; start = bdesc->start; @@ -480,23 +495,21 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) } } -static void show_leaf(struct subleaf *leaf) +static void show_reg_header(bool has_entries, u32 leaf, u32 subleaf, const char *reg_name) { - if (!leaf) - return; + if (show_details && has_entries) + printf("CPUID_0x%x_%s[0x%x]:\n", leaf, reg_name, subleaf); +} - if (show_raw) { +static void show_leaf(struct subleaf *leaf) +{ + if (show_raw) leaf_print_raw(leaf); - } else { - if (show_details) - printf("CPUID_0x%x_ECX[0x%x]:\n", - leaf->index, leaf->sub); - } - decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX); - decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX); - decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX); - decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX); + for (int i = R_EAX; i < NR_REGS; i++) { + show_reg_header((leaf->info[i].nr > 0), leaf->index, leaf->sub, reg_names[i]); + show_reg(&leaf->info[i], leaf->output[i]); + } if (!show_raw && show_details) printf("\n"); @@ -504,46 +517,37 @@ static void show_leaf(struct subleaf *leaf) static void show_func(struct cpuid_func *func) { - int i; - - if (!func) - return; - - for (i = 0; i < func->nr; i++) + for (int i = 0; i < func->nr; i++) show_leaf(&func->leafs[i]); } static void show_range(struct cpuid_range *range) { - int i; - - for (i = 0; i < range->nr; i++) + for (int i = 0; i < range->nr; i++) show_func(&range->funcs[i]); } static inline struct cpuid_func *index_to_func(u32 index) { + u32 func_idx = index & CPUID_FUNCTION_MASK; struct cpuid_range *range; - u32 func_idx; - - range = (index & 0x80000000) ? leafs_ext : leafs_basic; - func_idx = index & 0xffff; - if ((func_idx + 1) > (u32)range->nr) { - printf("ERR: invalid input index (0x%x)\n", index); + range = index_to_cpuid_range(index); + if (!range) return NULL; - } + return &range->funcs[func_idx]; } static void show_info(void) { + struct cpuid_range *range; struct cpuid_func *func; if (show_raw) { /* Show all of the raw output of 'cpuid' instr */ - raw_dump_range(leafs_basic); - raw_dump_range(leafs_ext); + for_each_valid_cpuid_range(range) + raw_dump_range(range); return; } @@ -551,18 +555,19 @@ static void show_info(void) /* Only show specific leaf/subleaf info */ func = index_to_func(user_index); if (!func) - return; + errx(EXIT_FAILURE, "Invalid input leaf (0x%x)", user_index); /* Dump the raw data also */ show_raw = true; if (user_sub != 0xFFFFFFFF) { - if (user_sub + 1 <= (u32)func->nr) { - show_leaf(&func->leafs[user_sub]); - return; + if (user_sub + 1 > (u32)func->nr) { + errx(EXIT_FAILURE, "Leaf 0x%x has no valid subleaf = 0x%x", + user_index, user_sub); } - printf("ERR: invalid input subleaf (0x%x)\n", user_sub); + show_leaf(&func->leafs[user_sub]); + return; } show_func(func); @@ -570,38 +575,21 @@ static void show_info(void) } printf("CPU features:\n=============\n\n"); - show_range(leafs_basic); - show_range(leafs_ext); + for_each_valid_cpuid_range(range) + show_range(range); } -static void setup_platform_cpuid(void) +static void __noreturn usage(int exit_code) { - u32 eax, ebx, ecx, edx; - - /* Check vendor */ - eax = ebx = ecx = edx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - - /* "htuA" */ - if (ebx == 0x68747541) - is_amd = true; - - /* Setup leafs for the basic and extended range */ - leafs_basic = setup_cpuid_range(0x0); - leafs_ext = setup_cpuid_range(0x80000000); -} - -static void usage(void) -{ - printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" - "\t-a|--all Show both bit flags and complex bit fields info\n" - "\t-b|--bitflags Show boolean flags only\n" - "\t-d|--detail Show details of the flag/fields (default)\n" - "\t-f|--flags Specify the cpuid csv file\n" - "\t-h|--help Show usage info\n" - "\t-l|--leaf=index Specify the leaf you want to check\n" - "\t-r|--raw Show raw cpuid data\n" - "\t-s|--subleaf=sub Specify the subleaf you want to check\n" + errx(exit_code, "kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the CPUID CSV file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw CPUID data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check" ); } @@ -617,7 +605,7 @@ static struct option opts[] = { { NULL, 0, NULL, 0 } }; -static int parse_options(int argc, char *argv[]) +static void parse_options(int argc, char *argv[]) { int c; @@ -637,9 +625,7 @@ static int parse_options(int argc, char *argv[]) user_csv = optarg; break; case 'h': - usage(); - exit(1); - break; + usage(EXIT_SUCCESS); case 'l': /* main leaf */ user_index = strtoul(optarg, NULL, 0); @@ -652,11 +638,8 @@ static int parse_options(int argc, char *argv[]) user_sub = strtoul(optarg, NULL, 0); break; default: - printf("%s: Invalid option '%c'\n", argv[0], optopt); - return -1; - } - - return 0; + usage(EXIT_FAILURE); + } } /* @@ -669,11 +652,13 @@ static int parse_options(int argc, char *argv[]) */ int main(int argc, char *argv[]) { - if (parse_options(argc, argv)) - return -1; + struct cpuid_range *range; + + parse_options(argc, argv); /* Setup the cpuid leafs of current platform */ - setup_platform_cpuid(); + for_each_cpuid_range(range) + setup_cpuid_range(range); /* Read and parse the 'cpuid.csv' */ parse_text(); diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index e91d4c4e1c16..bce69c6bfa69 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -324,6 +324,11 @@ int insn_get_opcode(struct insn *insn) } insn->attr = inat_get_opcode_attribute(op); + if (insn->x86_64 && inat_is_invalid64(insn->attr)) { + /* This instruction is invalid, like UD2. Stop decoding. */ + insn->attr &= INAT_INV64; + } + while (inat_is_escape(insn->attr)) { /* Get escaped opcode */ op = get_next(insn_byte_t, insn); @@ -337,6 +342,7 @@ int insn_get_opcode(struct insn *insn) insn->attr = 0; return -EINVAL; } + end: opcode->got = 1; return 0; @@ -658,7 +664,6 @@ int insn_get_immediate(struct insn *insn) } if (!inat_has_immediate(insn->attr)) - /* no immediates */ goto done; switch (inat_immediate_size(insn->attr)) { diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index f5dd84eb55dc..262f7ca1fb95 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -35,7 +35,7 @@ # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. # -# REX2 Prefix +# REX2 Prefix Superscripts # - (!REX2): REX2 is not allowed # - (REX2): REX2 variant e.g. JMPABS @@ -147,7 +147,7 @@ AVXcode: # 0x60 - 0x6f 60: PUSHA/PUSHAD (i64) 61: POPA/POPAD (i64) -62: BOUND Gv,Ma (i64) | EVEX (Prefix) +62: BOUND Gv,Ma (i64) | EVEX (Prefix),(o64) 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 64: SEG=FS (Prefix) 65: SEG=GS (Prefix) @@ -253,8 +253,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) -c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix),(o64) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix),(o64) c6: Grp11A Eb,Ib (1A) c7: Grp11B Ev,Iz (1A) c8: ENTER Iw,Ib @@ -286,10 +286,10 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) (!REX2) -e1: LOOPE/LOOPZ Jb (f64) (!REX2) -e2: LOOP Jb (f64) (!REX2) -e3: JrCXZ Jb (f64) (!REX2) +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) e4: IN AL,Ib (!REX2) e5: IN eAX,Ib (!REX2) e6: OUT Ib,AL (!REX2) @@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2) # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) (!REX2) -e9: JMP-near Jz (f64) (!REX2) -ea: JMP-far Ap (i64) (!REX2) -eb: JMP-short Jb (f64) (!REX2) +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) ec: IN AL,DX (!REX2) ed: IN eAX,DX (!REX2) ee: OUT DX,AL (!REX2) @@ -478,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) (!REX2) -81: JNO Jz (f64) (!REX2) -82: JB/JC/JNAE Jz (f64) (!REX2) -83: JAE/JNB/JNC Jz (f64) (!REX2) -84: JE/JZ Jz (f64) (!REX2) -85: JNE/JNZ Jz (f64) (!REX2) -86: JBE/JNA Jz (f64) (!REX2) -87: JA/JNBE Jz (f64) (!REX2) -88: JS Jz (f64) (!REX2) -89: JNS Jz (f64) (!REX2) -8a: JP/JPE Jz (f64) (!REX2) -8b: JNP/JPO Jz (f64) (!REX2) -8c: JL/JNGE Jz (f64) (!REX2) -8d: JNL/JGE Jz (f64) (!REX2) -8e: JLE/JNG Jz (f64) (!REX2) -8f: JNLE/JG Jz (f64) (!REX2) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk index 5770c8097f32..2c19d7fc8a85 100644 --- a/tools/arch/x86/tools/gen-insn-attr-x86.awk +++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk @@ -64,6 +64,8 @@ BEGIN { modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" + invalid64_expr = "\\(i64\\)" + only64_expr = "\\(o64\\)" rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))" rex2_expr = "\\(REX2\\)" no_rex2_expr = "\\(!REX2\\)" @@ -319,6 +321,11 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(ext, force64_expr)) flags = add_flags(flags, "INAT_FORCE64") + # check invalid in 64-bit (and no only64) + if (match(ext, invalid64_expr) && + !match($0, only64_expr)) + flags = add_flags(flags, "INAT_INV64") + # check REX2 not allowed if (match(ext, no_rex2_expr)) flags = add_flags(flags, "INAT_NO_REX2") diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h new file mode 100644 index 000000000000..e710967c7c26 --- /dev/null +++ b/tools/include/uapi/linux/fanotify.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FANOTIFY_H +#define _UAPI_LINUX_FANOTIFY_H + +#include <linux/types.h> + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ + +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ + +#define FAN_RENAME 0x10000000 /* File was renamed */ + +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +/* These are NOT bitwise flags. Both bits are used together. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 +#define FAN_ENABLE_AUDIT 0x00000040 + +/* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +/* FAN_MARK_MOUNT is 0x00000010 */ +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 + +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 3 + +struct fanotify_event_metadata { + __u32 event_len; + __u8 vers; + __u8 reserved; + __u16 metadata_len; + __aligned_u64 mask; + __s32 fd; + __s32 pid; +}; + +#define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. + */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[]; +}; + +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + +/* + * User space may need to record additional information about its decision. + * The extra information type records what kind of information is included. + * The default is none. We also define an extra information buffer whose + * size is determined by the extra information type. + * + * If the information type is Audit Rule, then the information following + * is the rule number that triggered the user space decision that + * requires auditing. + */ + +#define FAN_RESPONSE_INFO_NONE 0 +#define FAN_RESPONSE_INFO_AUDIT_RULE 1 + +struct fanotify_response { + __s32 fd; + __u32 response; +}; + +struct fanotify_response_info_header { + __u8 type; + __u8 pad; + __u16 len; +}; + +struct fanotify_response_info_audit_rule { + struct fanotify_response_info_header hdr; + __u32 rule_number; + __u32 subj_trust; + __u32 obj_trust; +}; + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + +#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ +#define FAN_INFO 0x20 /* Bitmask to indicate additional information */ + +/* No fd set in event */ +#define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _UAPI_LINUX_FANOTIFY_H */ diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h new file mode 100644 index 000000000000..7fa67c2031a5 --- /dev/null +++ b/tools/include/uapi/linux/mount.h @@ -0,0 +1,235 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +#include <linux/types.h> + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..34127653fd00 --- /dev/null +++ b/tools/include/uapi/linux/nsfs.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + +#endif /* __LINUX_NSFS_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 5fc753c23734..78a362b80027 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,24 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack - * Max number of entries of branch stack - * should be < hardware limit + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -396,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -451,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -474,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -510,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -537,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -559,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -584,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -622,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -650,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -723,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -739,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -813,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -824,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -859,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -874,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -884,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -894,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -905,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -916,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -927,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -939,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -950,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1005,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1035,10 +1045,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, @@ -1070,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1079,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1167,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1245,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1438,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 35791791a879..43dec6eed559 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -328,4 +331,44 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index af7fddd7b085..cab6b576c876 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -338,16 +338,24 @@ def main(): print('Capabilities:') [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])] - print(f'PTP Hardware Clock: {tsinfo["phc-index"]}') + print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}') - print('Hardware Transmit Timestamp Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + if 'tx-types' in tsinfo: + print('Hardware Transmit Timestamp Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + else: + print('Hardware Transmit Timestamp Modes: none') + + if 'rx-filters' in tsinfo: + print('Hardware Receive Filter Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + else: + print('Hardware Receive Filter Modes: none') - print('Hardware Receive Filter Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + if 'stats' in tsinfo and tsinfo['stats']: + print('Statistics:') + [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] - print('Statistics:') - [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] return print(f'Settings for {args.device}:') diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 30c0a34b2784..a7f08edbc235 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1143,10 +1143,9 @@ class Family(SpecFamily): self.pure_nested_structs[nested].request = True if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True - - if spec.is_multi_val(): - child = self.pure_nested_structs.get(nested) - child.in_multi_val = True + if spec.is_multi_val(): + child = self.pure_nested_structs.get(nested) + child.in_multi_val = True self._sort_pure_types() diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 3ce7b54003c2..98c4713c1b09 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op2 = ins.opcode.bytes[1]; op3 = ins.opcode.bytes[2]; + /* + * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long. + */ + if (op1 == 0xea) { + insn->len = 1; + insn->type = INSN_BUG; + return 0; + } + if (ins.rex_prefix.nbytes) { rex = ins.rex_prefix.bytes[0]; rex_w = X86_REX_W(rex) >> 3; @@ -842,12 +851,14 @@ int arch_decode_hint_reg(u8 sp_reg, int *base) bool arch_is_retpoline(struct symbol *sym) { - return !strncmp(sym->name, "__x86_indirect_", 15); + return !strncmp(sym->name, "__x86_indirect_", 15) || + !strncmp(sym->name, "__pi___x86_indirect_", 20); } bool arch_is_rethunk(struct symbol *sym) { - return !strcmp(sym->name, "__x86_return_thunk"); + return !strcmp(sym->name, "__x86_return_thunk") || + !strcmp(sym->name, "__pi___x86_return_thunk"); } bool arch_is_embedded_insn(struct symbol *sym) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 727a3a4fd9d7..ca5d77db692a 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -572,6 +572,34 @@ err: return -1; } +static int mark_group_syms(struct elf *elf) +{ + struct section *symtab, *sec; + struct symbol *sym; + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + ERROR("no .symtab"); + return -1; + } + + list_for_each_entry(sec, &elf->sections, list) { + if (sec->sh.sh_type == SHT_GROUP && + sec->sh.sh_link == symtab->idx) { + sym = find_symbol_by_index(elf, sec->sh.sh_info); + if (!sym) { + ERROR("%s: can't find SHT_GROUP signature symbol", + sec->name); + return -1; + } + + sym->group_sec = sec; + } + } + + return 0; +} + /* * @sym's idx has changed. Update the relocs which reference it. */ @@ -745,7 +773,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym) /* * Move the first global symbol, as per sh_info, into a new, higher - * symbol index. This fees up a spot for a new local symbol. + * symbol index. This frees up a spot for a new local symbol. */ first_non_local = symtab->sh.sh_info; old = find_symbol_by_index(elf, first_non_local); @@ -763,6 +791,11 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym) if (elf_update_sym_relocs(elf, old)) return NULL; + if (old->group_sec) { + old->group_sec->sh.sh_info = new_idx; + mark_sec_changed(elf, old->group_sec, true); + } + new_idx = first_non_local; } @@ -1035,6 +1068,9 @@ struct elf *elf_open_read(const char *name, int flags) if (read_symbols(elf)) goto err; + if (mark_group_syms(elf)) + goto err; + if (read_relocs(elf)) goto err; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index c7c4e87ebe88..0a2fa3ac0079 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -72,6 +72,7 @@ struct symbol { u8 ignore : 1; struct list_head pv_target; struct reloc *relocs; + struct section *group_sec; }; struct reloc { diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 279ab2ab4abe..b558ab98719f 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -3,6 +3,7 @@ perf-bench-y += sched-pipe.o perf-bench-y += sched-seccomp-notify.o perf-bench-y += syscall.o perf-bench-y += mem-functions.o +perf-bench-y += futex.o perf-bench-y += futex-hash.o perf-bench-y += futex-wake.o perf-bench-y += futex-wake-parallel.o diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index b472eded521b..fdf133c9520f 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -18,9 +18,11 @@ #include <stdlib.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/prctl.h> #include <linux/zalloc.h> #include <sys/time.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <perf/cpumap.h> #include "../util/mutex.h" @@ -50,9 +52,12 @@ struct worker { static struct bench_futex_parameters params = { .nfutexes = 1024, .runtime = 10, + .nbuckets = -1, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"), @@ -118,6 +123,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } int bench_futex_hash(int argc, const char **argv) @@ -161,6 +167,7 @@ int bench_futex_hash(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 0416120c091b..5144a158512c 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -41,10 +41,13 @@ static struct stats throughput_stats; static struct cond thread_parent, thread_worker; static struct bench_futex_parameters params = { + .nbuckets = -1, .runtime = 10, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_BOOLEAN( 'M', "multi", ¶ms.multi, "Use multiple futexes"), @@ -67,6 +70,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } static void toggle_done(int sig __maybe_unused, @@ -203,6 +207,7 @@ int bench_futex_lock_pi(int argc, const char **argv) mutex_init(&thread_lock); cond_init(&thread_parent); cond_init(&thread_worker); + futex_set_nbuckets_param(¶ms); threads_starting = params.nthreads; gettimeofday(&bench__start, NULL); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index aad5bfc4fe18..a2f91ee1950b 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many tasks to requeue at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,8 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -77,6 +80,7 @@ static void print_summary(void) params.nthreads, requeuetime_avg / USEC_PER_MSEC, rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); + futex_print_nbuckets(¶ms); } static void *workerfn(void *arg __maybe_unused) @@ -204,6 +208,8 @@ int bench_futex_requeue(int argc, const char **argv) if (params.broadcast) params.nrequeue = params.nthreads; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), " "%d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", &futex1, diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 4352e318631e..ee66482c29fd 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -57,9 +57,13 @@ static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting; static int futex_flag = 0; -static struct bench_futex_parameters params; +static struct bench_futex_parameters params = { + .nbuckets = -1, +}; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -218,6 +222,7 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } @@ -291,6 +296,8 @@ int bench_futex_wake_parallel(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: blocking on %d threads (at [%s] " "futex %p), %d threads waking up %d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 49b3c89b0b35..8d6107f7cd94 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many wakeups to do at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,8 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, "Specify amount of threads to wake at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -93,6 +96,7 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c new file mode 100644 index 000000000000..26382e4d8d4c --- /dev/null +++ b/tools/perf/bench/futex.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <linux/prctl.h> +#include <sys/prctl.h> + +#include "futex.h" + +void futex_set_nbuckets_param(struct bench_futex_parameters *params) +{ + unsigned long flags; + int ret; + + if (params->nbuckets < 0) + return; + + flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0; + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags); + if (ret) { + printf("Requesting %d hash buckets failed: %d/%m\n", + params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } +} + +void futex_print_nbuckets(struct bench_futex_parameters *params) +{ + char *futex_hash_mode; + int ret; + + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); + if (params->nbuckets >= 0) { + if (ret != params->nbuckets) { + if (ret < 0) { + printf("Can't query number of buckets: %m\n"); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + printf("Requested number of hash buckets does not currently used.\n"); + printf("Requested: %d in usage: %d\n", params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + if (params->nbuckets == 0) { + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + } else { + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); + if (ret < 0) { + printf("Can't check if the hash is immutable: %m\n"); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s", + params->nbuckets, + ret == 1 ? "(immutable)" : ""); + } + } else { + if (ret <= 0) { + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + } else { + ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets", + ret); + } + } + if (ret < 0) + err(EXIT_FAILURE, "ENOMEM, futex_hash_mode"); + printf("%s\n", futex_hash_mode); + free(futex_hash_mode); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index ebdc2b032afc..9c9a73f9d865 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -25,6 +25,8 @@ struct bench_futex_parameters { unsigned int nfutexes; unsigned int nwakes; unsigned int nrequeue; + int nbuckets; + bool buckets_immutable; }; /** @@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, val, opflags); } +void futex_set_nbuckets_param(struct bench_futex_parameters *params); +void futex_print_nbuckets(struct bench_futex_parameters *params); + #endif /* _FUTEX_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 857f6646cc23..e9fab20e9330 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -186,7 +186,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' -check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 9d0ce88e90e4..456ce64ad822 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -9,7 +9,7 @@ #include <inttypes.h> #include <linux/string.h> -#include "../../arch/x86/include/asm/amd-ibs.h" +#include "../../arch/x86/include/asm/amd/ibs.h" #include "debug.h" #include "session.h" diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c index 35ea65c54ffa..827507844e8f 100644 --- a/tools/testing/crypto/chacha20-s390/test-cipher.c +++ b/tools/testing/crypto/chacha20-s390/test-cipher.c @@ -50,7 +50,7 @@ struct skcipher_def { /* Perform cipher operations with the chacha lib */ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) { - u32 chacha_state[CHACHA_STATE_WORDS]; + struct chacha_state chacha_state; u8 iv[16], key[32]; u64 start, end; @@ -66,10 +66,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) } /* Encrypt */ - chacha_init(chacha_state, (u32 *)key, iv); + chacha_init(&chacha_state, (u32 *)key, iv); start = ktime_get_ns(); - chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20); + chacha_crypt_arch(&chacha_state, cipher, plain, data_size, 20); end = ktime_get_ns(); @@ -81,10 +81,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) pr_info("lib encryption took: %lld nsec", end - start); /* Decrypt */ - chacha_init(chacha_state, (u32 *)key, iv); + chacha_init(&chacha_state, (u32 *)key, iv); start = ktime_get_ns(); - chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20); + chacha_crypt_arch(&chacha_state, revert, cipher, data_size, 20); end = ktime_get_ns(); if (debug) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 422e186cf3cf..e70c502a16df 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -10,6 +10,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=y CONFIG_KUNIT_ALL_TESTS=y CONFIG_FORTIFY_SOURCE=y +CONFIG_INIT_STACK_ALL_PATTERN=y CONFIG_IIO=y diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 10ff65689dd8..80fa4e354a17 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -39,10 +39,20 @@ def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj: status = _status_map.get(subtest.status, "FAIL") test_cases.append({"name": subtest.name, "status": status}) + test_counts = test.counts + counts_json = { + "tests": test_counts.total(), + "passed": test_counts.passed, + "failed": test_counts.failed, + "crashed": test_counts.crashed, + "skipped": test_counts.skipped, + "errors": test_counts.errors, + } test_group = { "name": test.name, "sub_groups": sub_groups, "test_cases": test_cases, + "misc": counts_json } test_group.update(common_fields) return test_group diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index d3f39bc1ceec..260d8d9aa1db 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -14,6 +14,7 @@ import os import shlex import shutil import signal +import sys import threading from typing import Iterator, List, Optional, Tuple from types import FrameType @@ -201,6 +202,13 @@ def _default_qemu_config_path(arch: str) -> str: return config_path options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')] + + if arch == 'help': + print('um') + for option in options: + print(option) + sys.exit() + raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options))) def _get_qemu_ops(config_path: str, diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py index 7ec38d4131f7..5b4c895d5d5a 100644 --- a/tools/testing/kunit/qemu_configs/powerpc.py +++ b/tools/testing/kunit/qemu_configs/powerpc.py @@ -3,6 +3,7 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='powerpc', kconfig=''' CONFIG_PPC64=y +CONFIG_CPU_BIG_ENDIAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_HVC_CONSOLE=y''', diff --git a/tools/testing/kunit/qemu_configs/powerpc32.py b/tools/testing/kunit/qemu_configs/powerpc32.py new file mode 100644 index 000000000000..88bd60dbb948 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/powerpc32.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='powerpc', + kconfig=''' +CONFIG_PPC32=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_ADB_CUDA=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +''', + qemu_arch='ppc', + kernel_path='vmlinux', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'g3beige', '-cpu', 'max']) diff --git a/tools/testing/kunit/qemu_configs/powerpcle.py b/tools/testing/kunit/qemu_configs/powerpcle.py new file mode 100644 index 000000000000..7ddee8af4bd7 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/powerpcle.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='powerpc', + kconfig=''' +CONFIG_PPC64=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_HVC_CONSOLE=y +''', + qemu_arch='ppc64', + kernel_path='vmlinux', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'pseries', '-cpu', 'power8']) diff --git a/tools/testing/kunit/qemu_configs/riscv32.py b/tools/testing/kunit/qemu_configs/riscv32.py new file mode 100644 index 000000000000..b79ba0ae30f8 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/riscv32.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='riscv', + kconfig=''' +CONFIG_NONPORTABLE=y +CONFIG_ARCH_RV32I=y +CONFIG_ARCH_VIRT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +''', + qemu_arch='riscv32', + kernel_path='arch/riscv/boot/Image', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-machine', 'virt']) diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index 256d9573b446..2019550a1b69 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,6 +2,8 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' +CONFIG_KUNIT_FAULT_TEST=n +CONFIG_SPARC32=y CONFIG_SERIAL_SUNZILOG=y CONFIG_SERIAL_SUNZILOG_CONSOLE=y ''', diff --git a/tools/testing/kunit/qemu_configs/sparc64.py b/tools/testing/kunit/qemu_configs/sparc64.py new file mode 100644 index 000000000000..53d4e5a8c972 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/sparc64.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='sparc', + kconfig=''' +CONFIG_64BIT=y +CONFIG_SPARC64=y +CONFIG_PCI=y +CONFIG_SERIAL_SUNSU=y +CONFIG_SERIAL_SUNSU_CONSOLE=y +''', + qemu_arch='sparc64', + kernel_path='arch/sparc/boot/image', + kernel_command_line='console=ttyS0 kunit_shutdown=poweroff', + extra_qemu_params=[]) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c77c8c8e3d9b..80fb84fa3cfc 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -121,6 +121,7 @@ TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 +TARGETS += x86/bugs TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 1bd403a5ef7b..0fd8c9b0d38f 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -526,6 +526,12 @@ extern const struct bench bench_trig_uprobe_multi_push; extern const struct bench bench_trig_uretprobe_multi_push; extern const struct bench bench_trig_uprobe_multi_ret; extern const struct bench bench_trig_uretprobe_multi_ret; +#ifdef __x86_64__ +extern const struct bench bench_trig_uprobe_nop5; +extern const struct bench bench_trig_uretprobe_nop5; +extern const struct bench bench_trig_uprobe_multi_nop5; +extern const struct bench bench_trig_uretprobe_multi_nop5; +#endif extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; @@ -586,6 +592,12 @@ static const struct bench *benchs[] = { &bench_trig_uretprobe_multi_push, &bench_trig_uprobe_multi_ret, &bench_trig_uretprobe_multi_ret, +#ifdef __x86_64__ + &bench_trig_uprobe_nop5, + &bench_trig_uretprobe_nop5, + &bench_trig_uprobe_multi_nop5, + &bench_trig_uretprobe_multi_nop5, +#endif /* ringbuf/perfbuf benchmarks */ &bench_rb_libbpf, &bench_rb_custom, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 32e9f194d449..82327657846e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -333,6 +333,20 @@ static void *uprobe_producer_ret(void *input) return NULL; } +#ifdef __x86_64__ +__nocf_check __weak void uprobe_target_nop5(void) +{ + asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00"); +} + +static void *uprobe_producer_nop5(void *input) +{ + while (true) + uprobe_target_nop5(); + return NULL; +} +#endif + static void usetup(bool use_retprobe, bool use_multi, void *target_addr) { size_t uprobe_offset; @@ -448,6 +462,28 @@ static void uretprobe_multi_ret_setup(void) usetup(true, true /* use_multi */, &uprobe_target_ret); } +#ifdef __x86_64__ +static void uprobe_nop5_setup(void) +{ + usetup(false, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_nop5_setup(void) +{ + usetup(true, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uprobe_multi_nop5_setup(void) +{ + usetup(false, true /* use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_multi_nop5_setup(void) +{ + usetup(true, true /* use_multi */, &uprobe_target_nop5); +} +#endif + const struct bench bench_trig_syscall_count = { .name = "trig-syscall-count", .validate = trigger_validate, @@ -506,3 +542,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret"); +#ifdef __x86_64__ +BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5"); +BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5"); +BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5"); +BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5"); +#endif diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh index af169f831f2f..03f55405484b 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh @@ -2,7 +2,7 @@ set -eufo pipefail -for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret} +for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} do summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) printf "%-15s: %s\n" $i "$summary" diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 3720b7611523..e1495a4bbc99 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -158,7 +158,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TUN=y CONFIG_UNIX=y CONFIG_UPROBES=y -CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 706931a8c2c6..26c3bc2ce11d 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -128,7 +128,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TUN=y CONFIG_UNIX=y CONFIG_UPROBES=y -CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 137b2364a082..9984413be9f0 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -1,14 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 #include <fcntl.h> +#include <inttypes.h> #include <libgen.h> #include <linux/limits.h> #include <pthread.h> #include <string.h> +#include <sys/mount.h> #include <sys/resource.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> #include <unistd.h> #include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" #define STACKDUMP_FILE "stack_values" #define STACKDUMP_SCRIPT "stackdump" @@ -35,6 +41,7 @@ static void crashing_child(void) FIXTURE(coredump) { char original_core_pattern[256]; + pid_t pid_coredump_server; }; FIXTURE_SETUP(coredump) @@ -44,6 +51,7 @@ FIXTURE_SETUP(coredump) char *dir; int ret; + self->pid_coredump_server = -ESRCH; file = fopen("/proc/sys/kernel/core_pattern", "r"); ASSERT_NE(NULL, file); @@ -61,10 +69,17 @@ FIXTURE_TEARDOWN(coredump) { const char *reason; FILE *file; - int ret; + int ret, status; unlink(STACKDUMP_FILE); + if (self->pid_coredump_server > 0) { + kill(self->pid_coredump_server, SIGTERM); + waitpid(self->pid_coredump_server, &status, 0); + } + unlink("/tmp/coredump.file"); + unlink("/tmp/coredump.socket"); + file = fopen("/proc/sys/kernel/core_pattern", "w"); if (!file) { reason = "Unable to open core_pattern"; @@ -89,14 +104,14 @@ fail: fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); } -TEST_F(coredump, stackdump) +TEST_F_TIMEOUT(coredump, stackdump, 120) { struct sigaction action = {}; unsigned long long stack; char *test_dir, *line; size_t line_length; char buf[PATH_MAX]; - int ret, i; + int ret, i, status; FILE *file; pid_t pid; @@ -129,6 +144,10 @@ TEST_F(coredump, stackdump) /* * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file */ + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + for (i = 0; i < 10; ++i) { file = fopen(STACKDUMP_FILE, "r"); if (file) @@ -138,14 +157,466 @@ TEST_F(coredump, stackdump) ASSERT_NE(file, NULL); /* Step 4: Make sure all stack pointer values are non-zero */ + line = NULL; for (i = 0; -1 != getline(&line, &line_length, file); ++i) { stack = strtoull(line, NULL, 10); ASSERT_NE(stack, 0); } + free(line); ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); fclose(file); } +TEST_F(coredump, socket) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) { + fprintf(stderr, "Received connection from non-coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + fd_core_file = creat("/tmp/coredump.file", 0644); + if (fd_core_file < 0) { + fprintf(stderr, "Failed to create coredump file\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); + ASSERT_GT(st.st_size, 0); + /* + * We should somehow validate the produced core file. + * For now just allow for visual inspection + */ + system("file /tmp/coredump.file"); +} + +TEST_F(coredump, socket_detect_userspace_client) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (info.coredump_mask & PIDFD_COREDUMPED) { + fprintf(stderr, "Received unexpected connection from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + int fd_socket; + ssize_t ret; + + fd_socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd_socket < 0) + _exit(EXIT_FAILURE); + + + ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) + _exit(EXIT_FAILURE); + + (void *)write(fd_socket, &(char){ 0 }, 1); + close(fd_socket); + _exit(EXIT_SUCCESS); + } + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_NE(stat("/tmp/coredump.file", &st), 0); + ASSERT_EQ(errno, ENOENT); +} + +TEST_F(coredump, socket_enoent) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid; + char core_file[PATH_MAX]; + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); +} + +TEST_F(coredump, socket_no_listener) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index e350c521b467..9927b654fb8f 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -52,7 +52,14 @@ read_cpufreq_files_in_dir() for file in $files; do if [ -f $1/$file ]; then printf "$file:" - cat $1/$file + #file is readable ? + local rfile=$(ls -l $1/$file | awk '$1 ~ /^.*r.*/ { print $NF; }') + + if [ ! -z $rfile ]; then + cat $1/$file + else + printf "$file is not readable\n" + fi else printf "\n" read_cpufreq_files_in_dir "$1/$file" @@ -83,10 +90,10 @@ update_cpufreq_files_in_dir() for file in $files; do if [ -f $1/$file ]; then - # file is writable ? - local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }') + # file is readable and writable ? + local rwfile=$(ls -l $1/$file | awk '$1 ~ /^.*rw.*/ { print $NF; }') - if [ ! -z $wfile ]; then + if [ ! -z $rwfile ]; then # scaling_setspeed is a special file and we # should skip updating it if [ $file != "scaling_setspeed" ]; then @@ -244,9 +251,10 @@ do_suspend() printf "Failed to suspend using RTC wake alarm\n" return 1 fi + else + echo $filename > $SYSFS/power/state fi - echo $filename > $SYSFS/power/state printf "Came out of $1\n" printf "Do basic tests after finishing $1 to verify cpufreq state\n\n" diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 2bf14ac2b8c6..9d48004ff1a1 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -431,6 +431,22 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) return 0; } +static struct netdev_queue_id *create_queues(void) +{ + struct netdev_queue_id *queues; + size_t i = 0; + + queues = calloc(num_queues, sizeof(*queues)); + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + return queues; +} + int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; @@ -448,7 +464,6 @@ int do_server(struct memory_buffer *mem) char buffer[256]; int socket_fd; int client_fd; - size_t i = 0; int ret; ret = parse_address(server_ip, atoi(port), &server_sin); @@ -471,16 +486,7 @@ int do_server(struct memory_buffer *mem) sleep(1); - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); tmp_mem = malloc(mem->size); @@ -545,7 +551,6 @@ int do_server(struct memory_buffer *mem) goto cleanup; } - i++; for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && @@ -630,10 +635,8 @@ cleanup: void run_devmem_tests(void) { - struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; - size_t i = 0; mem = provider->alloc(getpagesize() * NUM_PAGES); @@ -641,38 +644,24 @@ void run_devmem_tests(void) if (configure_rss()) error(1, 0, "rss error\n"); - queues = calloc(num_queues, sizeof(*queues)); - if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, + calloc(num_queues, sizeof(struct netdev_queue_id)), + num_queues, &ys)) error(1, 0, "Binding empty queues array should have failed\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - if (configure_headersplit(0)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Configure dmabuf with header split off should have failed\n"); if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 828b66a10c63..7afa58e2bb20 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -2,3 +2,4 @@ dnotify_test devpts_pts file_stressor +anon_inode_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 66305fc34c60..b02326193fee 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c new file mode 100644 index 000000000000..e8e0ef1460d2 --- /dev/null +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> + +#include "../kselftest_harness.h" +#include "overlayfs/wrappers.h" + +TEST(anon_inode_no_chown) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchown(fd_context, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_chmod) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchmod(fd_context, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_exec) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_open) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_GE(dup2(fd_context, 500), 0); + ASSERT_EQ(close(fd_context), 0); + fd_context = 500; + + ASSERT_LT(open("/proc/self/fd/500", 0), 0); + ASSERT_EQ(errno, ENXIO); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore index 82a4846cbc4b..124339ea7845 100644 --- a/tools/testing/selftests/filesystems/mount-notify/.gitignore +++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only /*_test +/*_test_ns diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile index 10be0227b5ae..836a4eb7be06 100644 --- a/tools/testing/selftests/filesystems/mount-notify/Makefile +++ b/tools/testing/selftests/filesystems/mount-notify/Makefile @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := mount-notify_test +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + +TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns include ../../lib.mk + +$(OUTPUT)/mount-notify_test: ../utils.c +$(OUTPUT)/mount-notify_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 59a71f22fb11..63ce708d93ed 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -8,43 +8,21 @@ #include <string.h> #include <sys/stat.h> #include <sys/mount.h> -#include <linux/fanotify.h> #include <unistd.h> -#include <sys/fanotify.h> #include <sys/syscall.h> #include "../../kselftest_harness.h" #include "../statmount/statmount.h" +#include "../utils.h" -#ifndef FAN_MNT_ATTACH -struct fanotify_event_info_mnt { - struct fanotify_event_info_header hdr; - __u64 mnt_id; -}; -#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ -#endif - -#ifndef FAN_MNT_DETACH -#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ -#endif - -#ifndef FAN_REPORT_MNT -#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ +// Needed for linux/fanotify.h +#ifndef __kernel_fsid_t +typedef struct { + int val[2]; +} __kernel_fsid_t; #endif -#ifndef FAN_MARK_MNTNS -#define FAN_MARK_MNTNS 0x00000110 -#endif - -static uint64_t get_mnt_id(struct __test_metadata *const _metadata, - const char *path) -{ - struct statx sx; - - ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0); - ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE)); - return sx.stx_mnt_id; -} +#include <sys/fanotify.h> static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; @@ -94,7 +72,7 @@ FIXTURE_SETUP(fanotify) ASSERT_EQ(mkdir("b", 0700), 0); - self->root_id = get_mnt_id(_metadata, "/"); + self->root_id = get_unique_mnt_id("/"); ASSERT_NE(self->root_id, 0); for (i = 0; i < NUM_FAN_FDS; i++) { diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c new file mode 100644 index 000000000000..090a5ca65004 --- /dev/null +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "../../kselftest_harness.h" +#include "../../pidfd/pidfd.h" +#include "../statmount/statmount.h" +#include "../utils.h" + +// Needed for linux/fanotify.h +#ifndef __kernel_fsid_t +typedef struct { + int val[2]; +} __kernel_fsid_t; +#endif + +#include <sys/fanotify.h> + +static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; + +static const int mark_types[] = { + FAN_MARK_FILESYSTEM, + FAN_MARK_MOUNT, + FAN_MARK_INODE +}; + +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + +FIXTURE(fanotify) { + int fan_fd[NUM_FAN_FDS]; + char buf[256]; + unsigned int rem; + void *next; + char root_mntpoint[sizeof(root_mntpoint_templ)]; + int orig_root; + int orig_ns_fd; + int ns_fd; + uint64_t root_id; +}; + +FIXTURE_SETUP(fanotify) +{ + int i, ret; + + self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->orig_ns_fd, 0); + + ret = setup_userns(); + ASSERT_EQ(ret, 0); + + self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->ns_fd, 0); + + strcpy(self->root_mntpoint, root_mntpoint_templ); + ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); + + self->orig_root = open("/", O_PATH | O_CLOEXEC); + ASSERT_GE(self->orig_root, 0); + + ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); + + ASSERT_EQ(chroot(self->root_mntpoint), 0); + + ASSERT_EQ(chdir("/"), 0); + + ASSERT_EQ(mkdir("a", 0700), 0); + + ASSERT_EQ(mkdir("b", 0700), 0); + + self->root_id = get_unique_mnt_id("/"); + ASSERT_NE(self->root_id, 0); + + for (i = 0; i < NUM_FAN_FDS; i++) { + int fan_fd = fanotify_init(FAN_REPORT_FID, 0); + // Verify that watching tmpfs mounted inside userns is allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i], + FAN_OPEN, AT_FDCWD, "/"); + ASSERT_EQ(ret, 0); + // ...but watching entire orig root filesystem is not allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM, + FAN_OPEN, self->orig_root, "."); + ASSERT_NE(ret, 0); + close(fan_fd); + + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + // Verify that watching mntns where group was created is allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // ...but watching orig mntns is not allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->orig_ns_fd, NULL); + ASSERT_NE(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } + + self->rem = 0; +} + +FIXTURE_TEARDOWN(fanotify) +{ + int i; + + ASSERT_EQ(self->rem, 0); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); + + ASSERT_EQ(fchdir(self->orig_root), 0); + + ASSERT_EQ(chroot("."), 0); + + EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); + EXPECT_EQ(chdir(self->root_mntpoint), 0); + EXPECT_EQ(chdir("/"), 0); + EXPECT_EQ(rmdir(self->root_mntpoint), 0); +} + +static uint64_t expect_notify(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t *mask) +{ + struct fanotify_event_metadata *meta; + struct fanotify_event_info_mnt *mnt; + unsigned int thislen; + + if (!self->rem) { + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } + + self->rem = len; + self->next = (void *) self->buf; + } + + meta = self->next; + ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); + + thislen = meta->event_len; + self->rem -= thislen; + self->next += thislen; + + *mask = meta->mask; + thislen -= sizeof(*meta); + + mnt = ((void *) meta) + meta->event_len - thislen; + + ASSERT_EQ(thislen, sizeof(*mnt)); + + return mnt->mnt_id; +} + +static void expect_notify_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + unsigned int n, uint64_t mask[], uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify(_metadata, self, &mask[i]); +} + +static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t expect_mask) +{ + uint64_t mntid, mask; + + mntid = expect_notify(_metadata, self, &mask); + ASSERT_EQ(expect_mask, mask); + + return mntid; +} + + +static void expect_notify_mask_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t mask, unsigned int n, uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify_mask(_metadata, self, mask); +} + +static void verify_mount_ids(struct __test_metadata *const _metadata, + const uint64_t list1[], const uint64_t list2[], + size_t num) +{ + unsigned int i, j; + + // Check that neither list has any duplicates + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (i != j) { + ASSERT_NE(list1[i], list1[j]); + ASSERT_NE(list2[i], list2[j]); + } + } + } + // Check that all list1 memebers can be found in list2. Together with + // the above it means that the list1 and list2 represent the same sets. + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (list1[i] == list2[j]) + break; + } + ASSERT_NE(j, num); + } +} + +static void check_mounted(struct __test_metadata *const _metadata, + const uint64_t mnts[], size_t num) +{ + ssize_t ret; + uint64_t *list; + + list = malloc((num + 1) * sizeof(list[0])); + ASSERT_NE(list, NULL); + + ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); + ASSERT_EQ(ret, num); + + verify_mount_ids(_metadata, mnts, list, num); + + free(list); +} + +static void setup_mount_tree(struct __test_metadata *const _metadata, + int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + } +} + +TEST_F(fanotify, bind) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, move) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + uint64_t move_id; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); + ASSERT_EQ(ret, 0); + + move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); + ASSERT_EQ(move_id, mnts[1]); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, propagate) +{ + const unsigned int log2_num = 4; + const unsigned int num = (1 << log2_num); + uint64_t mnts[num]; + + setup_mount_tree(_metadata, log2_num); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); + + mnts[0] = self->root_id; + check_mounted(_metadata, mnts, num); + + // Cleanup + int ret; + uint64_t mnts2[num]; + ret = umount2("/", MNT_DETACH); + ASSERT_EQ(ret, 0); + + ret = mount("", "/", NULL, MS_PRIVATE, NULL); + ASSERT_EQ(ret, 0); + + mnts2[0] = self->root_id; + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); + verify_mount_ids(_metadata, mnts, mnts2, num); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, fsmount) +{ + int ret, fs, mnt; + uint64_t mnts[2] = { self->root_id }; + + fs = fsopen("tmpfs", 0); + ASSERT_GE(fs, 0); + + ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); + ASSERT_EQ(ret, 0); + + mnt = fsmount(fs, 0, 0); + ASSERT_GE(mnt, 0); + + close(fs); + + ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); + ASSERT_EQ(ret, 0); + + close(mnt); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, reparent) +{ + uint64_t mnts[6] = { self->root_id }; + uint64_t dmnts[3]; + uint64_t masks[3]; + unsigned int i; + int ret; + + // Create setup with a[1] -> b[2] propagation + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/a", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/b", NULL, MS_SLAVE, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + // Mount on a[3], which is propagated to b[4] + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); + + check_mounted(_metadata, mnts, 5); + + // Mount on b[5], not propagated + ret = mount("/", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + check_mounted(_metadata, mnts, 6); + + // Umount a[3], which is propagated to b[4], but not b[5] + // This will result in b[5] "falling" on b[2] + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + expect_notify_n(_metadata, self, 3, masks, dmnts); + verify_mount_ids(_metadata, mnts + 3, dmnts, 3); + + for (i = 0; i < 3; i++) { + if (dmnts[i] == mnts[5]) { + ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); + } else { + ASSERT_EQ(masks[i], FAN_MNT_DETACH); + } + } + + mnts[3] = mnts[5]; + check_mounted(_metadata, mnts, 4); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); + verify_mount_ids(_metadata, mnts + 1, dmnts, 3); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, rmdir) +{ + uint64_t mnts[3] = { self->root_id }; + int ret; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/", "/a/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + ret = chdir("/a"); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret == 0) { + chdir("/"); + unshare(CLONE_NEWNS); + mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + umount2("/a", MNT_DETACH); + // This triggers a detach in the other namespace + rmdir("/a"); + exit(0); + } + wait(NULL); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); + check_mounted(_metadata, mnts, 1); + + // Cleanup + ret = chdir("/"); + ASSERT_EQ(ret, 0); +} + +TEST_F(fanotify, pivot_root) +{ + uint64_t mnts[3] = { self->root_id }; + uint64_t mnts2[3]; + int ret; + + ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); + ASSERT_EQ(ret, 0); + + mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + ret = mkdir("/a/new", 0700); + ASSERT_EQ(ret, 0); + + ret = mkdir("/a/old", 0700); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + check_mounted(_metadata, mnts, 3); + + ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); + verify_mount_ids(_metadata, mnts, mnts2, 2); + check_mounted(_metadata, mnts, 3); + + // Cleanup + ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile index 6c661232b3b5..d3ad4a77db9b 100644 --- a/tools/testing/selftests/filesystems/overlayfs/Makefile +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wall CFLAGS += $(KHDR_INCLUDES) LDLIBS += -lcap -LOCAL_HDRS += wrappers.h log.h +LOCAL_HDRS += ../wrappers.h log.h TEST_GEN_PROGS := dev_in_maps TEST_GEN_PROGS += set_layers_via_fds diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c index 3b796264223f..31db54b00e64 100644 --- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -17,7 +17,7 @@ #include "../../kselftest.h" #include "log.h" -#include "wrappers.h" +#include "../wrappers.h" static long get_file_dev_and_inode(void *addr, struct statx *stx) { diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c index 5074e64e74a8..dc0449fa628f 100644 --- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c @@ -16,7 +16,7 @@ #include "../../pidfd/pidfd.h" #include "log.h" #include "../utils.h" -#include "wrappers.h" +#include "../wrappers.h" FIXTURE(set_layers_via_fds) { int pidfd; diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 14ee91a41650..8e354fe99b44 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk + +$(OUTPUT)/statmount_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h index a7a5289ddae9..99e5ad082fb1 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount.h +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -7,6 +7,42 @@ #include <linux/mount.h> #include <asm/unistd.h> +#ifndef __NR_statmount + #if defined __alpha__ + #define __NR_statmount 567 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_statmount 4457 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_statmount 6457 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_statmount 5457 + #endif + #else + #define __NR_statmount 457 + #endif +#endif + +#ifndef __NR_listmount + #if defined __alpha__ + #define __NR_listmount 568 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_listmount 4458 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_listmount 6458 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_listmount 5458 + #endif + #else + #define __NR_listmount 458 + #endif +#endif + static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, struct statmount *buf, size_t bufsize, unsigned int flags) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c index 70cb0c8b21cf..605a3fa16bf7 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -14,6 +14,7 @@ #include <linux/stat.h> #include "statmount.h" +#include "../utils.h" #include "../../kselftest.h" #define NSID_PASS 0 @@ -78,87 +79,10 @@ static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) return NSID_PASS; } -static int get_mnt_id(const char *path, uint64_t *mnt_id) -{ - struct statx sx; - int ret; - - ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); - if (ret == -1) { - ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, - strerror(errno)); - return NSID_ERROR; - } - - if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { - ksft_print_msg("no unique mount ID available for %s\n", path); - return NSID_ERROR; - } - - *mnt_id = sx.stx_mnt_id; - return NSID_PASS; -} - -static int write_file(const char *path, const char *val) -{ - int fd = open(path, O_WRONLY); - size_t len = strlen(val); - int ret; - - if (fd == -1) { - ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); - return NSID_ERROR; - } - - ret = write(fd, val, len); - if (ret == -1) { - ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); - return NSID_ERROR; - } - if (ret != len) { - ksft_print_msg("short write to %s\n", path); - return NSID_ERROR; - } - - ret = close(fd); - if (ret == -1) { - ksft_print_msg("closing %s\n", path); - return NSID_ERROR; - } - - return NSID_PASS; -} - static int setup_namespace(void) { - int ret; - char buf[32]; - uid_t uid = getuid(); - gid_t gid = getgid(); - - ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); - if (ret == -1) - ksft_exit_fail_msg("unsharing mountns and userns: %s\n", - strerror(errno)); - - sprintf(buf, "0 %d 1", uid); - ret = write_file("/proc/self/uid_map", buf); - if (ret != NSID_PASS) - return ret; - ret = write_file("/proc/self/setgroups", "deny"); - if (ret != NSID_PASS) - return ret; - sprintf(buf, "0 %d 1", gid); - ret = write_file("/proc/self/gid_map", buf); - if (ret != NSID_PASS) - return ret; - - ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); - if (ret == -1) { - ksft_print_msg("making mount tree private: %s\n", - strerror(errno)); + if (setup_userns() != 0) return NSID_ERROR; - } return NSID_PASS; } @@ -174,9 +98,9 @@ static int _test_statmount_mnt_ns_id(void) if (ret != NSID_PASS) return ret; - ret = get_mnt_id("/", &root_id); - if (ret != NSID_PASS) - return ret; + root_id = get_unique_mnt_id("/"); + if (!root_id) + return NSID_ERROR; ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); if (ret == -1) { diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c index e553c89c5b19..c43a69dffd83 100644 --- a/tools/testing/selftests/filesystems/utils.c +++ b/tools/testing/selftests/filesystems/utils.c @@ -18,7 +18,10 @@ #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> +#include <sys/mount.h> +#include "../kselftest.h" +#include "wrappers.h" #include "utils.h" #define MAX_USERNS_LEVEL 32 @@ -447,6 +450,71 @@ out_close: return fret; } +static int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) { + ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); + return -1; + } + + ret = write(fd, val, len); + if (ret == -1) { + ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + return -1; + } + if (ret != len) { + ksft_print_msg("short write to %s\n", path); + return -1; + } + + ret = close(fd); + if (ret == -1) { + ksft_print_msg("closing %s\n", path); + return -1; + } + + return 0; +} + +int setup_userns(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); + if (ret) { + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + return ret; + } + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret) + return ret; + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret) { + ksft_print_msg("making mount tree private: %s\n", strerror(errno)); + return ret; + } + + return 0; +} + /* caps_down - lower all effective caps */ int caps_down(void) { @@ -499,3 +567,23 @@ out: cap_free(caps); return fret; } + +uint64_t get_unique_mnt_id(const char *path) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); + if (ret == -1) { + ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, + strerror(errno)); + return 0; + } + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { + ksft_print_msg("no unique mount ID available for %s\n", path); + return 0; + } + + return sx.stx_mnt_id; +} diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h index 7f1df2a3e94c..70f7ccc607f4 100644 --- a/tools/testing/selftests/filesystems/utils.h +++ b/tools/testing/selftests/filesystems/utils.h @@ -27,6 +27,7 @@ extern int caps_down(void); extern int cap_down(cap_value_t down); extern bool switch_ids(uid_t uid, gid_t gid); +extern int setup_userns(void); static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) { @@ -42,4 +43,6 @@ static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) return true; } +extern uint64_t get_unique_mnt_id(const char *path); + #endif /* __IDMAP_UTILS_H */ diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h index c38bc48e0cfa..420ae4f908cf 100644 --- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h +++ b/tools/testing/selftests/filesystems/wrappers.h @@ -9,6 +9,10 @@ #include <linux/mount.h> #include <sys/syscall.h> +#ifndef STATX_MNT_ID_UNIQUE +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#endif + static inline int sys_fsopen(const char *fsname, unsigned int flags) { return syscall(__NR_fsopen, fsname, flags); @@ -36,6 +40,28 @@ static inline int sys_mount(const char *src, const char *tgt, const char *fst, #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ #endif +#ifndef MOVE_MOUNT_T_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #else + #define __NR_move_mount 429 + #endif +#endif + static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) @@ -53,7 +79,25 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname, #endif #ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #else + #define __NR_open_tree 428 + #endif #endif static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 49d96bb16355..7c12263f8260 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -6,6 +6,6 @@ TEST_PROGS := ftracetest-ktap TEST_FILES := test.d settings EXTRA_CLEAN := $(OUTPUT)/logs/* -TEST_GEN_PROGS = poll +TEST_GEN_FILES := poll include ../lib.mk diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index fbcbdb6963b3..7b24ae89594a 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -1,11 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only +futex_numa_mpol +futex_priv_hash +futex_requeue futex_requeue_pi futex_requeue_pi_mismatched_ops futex_requeue_pi_signal_restart +futex_wait futex_wait_private_mapped_file futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock -futex_wait -futex_requeue futex_waitv diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index f79f9bac7918..8cfb87f7f7c5 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) -LDLIBS := -lpthread -lrt +LDLIBS := -lpthread -lrt -lnuma LOCAL_HDRS := \ ../include/futextest.h \ @@ -17,7 +17,10 @@ TEST_GEN_PROGS := \ futex_wait_private_mapped_file \ futex_wait \ futex_requeue \ - futex_waitv + futex_priv_hash \ + futex_numa_mpol \ + futex_waitv \ + futex_numa TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c new file mode 100644 index 000000000000..f29e4d627e79 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_numa.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <pthread.h> +#include <sys/shm.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <stdbool.h> +#include <time.h> +#include <assert.h> +#include "logging.h" +#include "futextest.h" +#include "futex2test.h" + +typedef u_int32_t u32; +typedef int32_t s32; +typedef u_int64_t u64; + +static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE); +static int fnode = FUTEX_NO_NODE; + +/* fairly stupid test-and-set lock with a waiter flag */ + +#define N_LOCK 0x0000001 +#define N_WAITERS 0x0001000 + +struct futex_numa_32 { + union { + u64 full; + struct { + u32 val; + u32 node; + }; + }; +}; + +void futex_numa_32_lock(struct futex_numa_32 *lock) +{ + for (;;) { + struct futex_numa_32 new, old = { + .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED), + }; + + for (;;) { + new = old; + if (old.val == 0) { + /* no waiter, no lock -> first lock, set no-node */ + new.node = fnode; + } + if (old.val & N_LOCK) { + /* contention, set waiter */ + new.val |= N_WAITERS; + } + new.val |= N_LOCK; + + /* nothing changed, ready to block */ + if (old.full == new.full) + break; + + /* + * Use u64 cmpxchg to set the futex value and node in a + * consistent manner. + */ + if (__atomic_compare_exchange_n(&lock->full, + &old.full, new.full, + /* .weak */ false, + __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) { + + /* if we just set N_LOCK, we own it */ + if (!(old.val & N_LOCK)) + return; + + /* go block */ + break; + } + } + + futex2_wait(lock, new.val, fflags, NULL, 0); + } +} + +void futex_numa_32_unlock(struct futex_numa_32 *lock) +{ + u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE); + assert((s32)val >= 0); + if (val & N_WAITERS) { + int woken = futex2_wake(lock, 1, fflags); + assert(val == N_WAITERS); + if (!woken) { + __atomic_compare_exchange_n(&lock->val, &val, 0U, + false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + } + } +} + +static long nanos = 50000; + +struct thread_args { + pthread_t tid; + volatile int * done; + struct futex_numa_32 *lock; + int val; + int *val1, *val2; + int node; +}; + +static void *threadfn(void *_arg) +{ + struct thread_args *args = _arg; + struct timespec ts = { + .tv_nsec = nanos, + }; + int node; + + while (!*args->done) { + + futex_numa_32_lock(args->lock); + args->val++; + + assert(*args->val1 == *args->val2); + (*args->val1)++; + nanosleep(&ts, NULL); + (*args->val2)++; + + node = args->lock->node; + futex_numa_32_unlock(args->lock); + + if (node != args->node) { + args->node = node; + printf("node: %d\n", node); + } + + nanosleep(&ts, NULL); + } + + return NULL; +} + +static void *contendfn(void *_arg) +{ + struct thread_args *args = _arg; + + while (!*args->done) { + /* + * futex2_wait() will take hb-lock, verify *var == val and + * queue/abort. By knowingly setting val 'wrong' this will + * abort and thereby generate hb-lock contention. + */ + futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0); + args->val++; + } + + return NULL; +} + +static volatile int done = 0; +static struct futex_numa_32 lock = { .val = 0, }; +static int val1, val2; + +int main(int argc, char *argv[]) +{ + struct thread_args *tas[512], *cas[512]; + int c, t, threads = 2, contenders = 0; + int sleeps = 10; + int total = 0; + + while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) { + switch (c) { + case 'c': + contenders = atoi(optarg); + break; + case 't': + threads = atoi(optarg); + break; + case 's': + sleeps = atoi(optarg); + break; + case 'n': + nanos = atoi(optarg); + break; + case 'N': + fflags |= FUTEX2_NUMA; + if (optarg) + fnode = atoi(optarg); + break; + default: + exit(1); + break; + } + } + + for (t = 0; t < contenders; t++) { + struct thread_args *args = calloc(1, sizeof(*args)); + if (!args) { + perror("thread_args"); + exit(-1); + } + + args->done = &done; + args->lock = &lock; + args->val1 = &val1; + args->val2 = &val2; + args->node = -1; + + if (pthread_create(&args->tid, NULL, contendfn, args)) { + perror("pthread_create"); + exit(-1); + } + + cas[t] = args; + } + + for (t = 0; t < threads; t++) { + struct thread_args *args = calloc(1, sizeof(*args)); + if (!args) { + perror("thread_args"); + exit(-1); + } + + args->done = &done; + args->lock = &lock; + args->val1 = &val1; + args->val2 = &val2; + args->node = -1; + + if (pthread_create(&args->tid, NULL, threadfn, args)) { + perror("pthread_create"); + exit(-1); + } + + tas[t] = args; + } + + sleep(sleeps); + + done = true; + + for (t = 0; t < threads; t++) { + struct thread_args *args = tas[t]; + + pthread_join(args->tid, NULL); + total += args->val; +// printf("tval: %d\n", args->val); + } + printf("total: %d\n", total); + + if (contenders) { + total = 0; + for (t = 0; t < contenders; t++) { + struct thread_args *args = cas[t]; + + pthread_join(args->tid, NULL); + total += args->val; + // printf("tval: %d\n", args->val); + } + printf("contenders: %d\n", total); + } + + return 0; +} + diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c new file mode 100644 index 000000000000..20a9d3ecf743 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de> + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <numa.h> +#include <numaif.h> + +#include <linux/futex.h> +#include <sys/mman.h> + +#include "logging.h" +#include "futextest.h" +#include "futex2test.h" + +#define MAX_THREADS 64 + +static pthread_barrier_t barrier_main; +static pthread_t threads[MAX_THREADS]; + +struct thread_args { + void *futex_ptr; + unsigned int flags; + int result; +}; + +static struct thread_args thread_args[MAX_THREADS]; + +#ifndef FUTEX_NO_NODE +#define FUTEX_NO_NODE (-1) +#endif + +#ifndef FUTEX2_MPOL +#define FUTEX2_MPOL 0x08 +#endif + +static void *thread_lock_fn(void *arg) +{ + struct thread_args *args = arg; + int ret; + + pthread_barrier_wait(&barrier_main); + ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0); + args->result = ret; + return NULL; +} + +static void create_max_threads(void *futex_ptr) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + thread_args[i].futex_ptr = futex_ptr; + thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA; + thread_args[i].result = 0; + ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]); + if (ret) + ksft_exit_fail_msg("pthread_create failed\n"); + } +} + +static void join_max_threads(void) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_join(threads[i], NULL); + if (ret) + ksft_exit_fail_msg("pthread_join failed for thread %d\n", i); + } +} + +static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags) +{ + int to_wake, ret, i, need_exit = 0; + + pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); + create_max_threads(futex_ptr); + pthread_barrier_wait(&barrier_main); + to_wake = MAX_THREADS; + + do { + ret = futex2_wake(futex_ptr, to_wake, futex_flags); + if (must_fail) { + if (ret < 0) + break; + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", + to_wake, futex_flags); + } + if (ret < 0) { + ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n", + to_wake, futex_flags); + } + if (!ret) + usleep(50); + to_wake -= ret; + + } while (to_wake); + join_max_threads(); + + for (i = 0; i < MAX_THREADS; i++) { + if (must_fail && thread_args[i].result != -1) { + ksft_print_msg("Thread %d should fail but succeeded (%d)\n", + i, thread_args[i].result); + need_exit = 1; + } + if (!must_fail && thread_args[i].result != 0) { + ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result); + need_exit = 1; + } + } + if (need_exit) + ksft_exit_fail_msg("Aborting due to earlier errors.\n"); +} + +static void test_futex(void *futex_ptr, int must_fail) +{ + __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); +} + +static void test_futex_mpol(void *futex_ptr, int must_fail) +{ + __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); +} + +static void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + struct futex32_numa *futex_numa; + int mem_size, i; + void *futex_ptr; + char c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(1); + + mem_size = sysconf(_SC_PAGE_SIZE); + futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (futex_ptr == MAP_FAILED) + ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size); + + futex_numa = futex_ptr; + + ksft_print_msg("Regular test\n"); + futex_numa->futex = 0; + futex_numa->numa = FUTEX_NO_NODE; + test_futex(futex_ptr, 0); + + if (futex_numa->numa == FUTEX_NO_NODE) + ksft_exit_fail_msg("NUMA node is left uninitialized\n"); + + ksft_print_msg("Memory too small\n"); + test_futex(futex_ptr + mem_size - 4, 1); + + ksft_print_msg("Memory out of range\n"); + test_futex(futex_ptr + mem_size, 1); + + futex_numa->numa = FUTEX_NO_NODE; + mprotect(futex_ptr, mem_size, PROT_READ); + ksft_print_msg("Memory, RO\n"); + test_futex(futex_ptr, 1); + + mprotect(futex_ptr, mem_size, PROT_NONE); + ksft_print_msg("Memory, no access\n"); + test_futex(futex_ptr, 1); + + mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE); + ksft_print_msg("Memory back to RW\n"); + test_futex(futex_ptr, 0); + + /* MPOL test. Does not work as expected */ + for (i = 0; i < 4; i++) { + unsigned long nodemask; + int ret; + + nodemask = 1 << i; + ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, + sizeof(nodemask) * 8, 0); + if (ret == 0) { + ksft_print_msg("Node %d test\n", i); + futex_numa->futex = 0; + futex_numa->numa = FUTEX_NO_NODE; + + ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); + if (ret < 0) + ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n"); + if (0) + test_futex_mpol(futex_numa, 0); + if (futex_numa->numa != i) { + ksft_test_result_fail("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); + } + } + } + ksft_test_result_pass("NUMA MPOL tests passed\n"); + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c new file mode 100644 index 000000000000..2dca18fefedc --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de> + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/prctl.h> +#include <sys/prctl.h> + +#include "logging.h" + +#define MAX_THREADS 64 + +static pthread_barrier_t barrier_main; +static pthread_mutex_t global_lock; +static pthread_t threads[MAX_THREADS]; +static int counter; + +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 +#endif + +static int futex_hash_slots_set(unsigned int slots, int flags) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags); +} + +static int futex_hash_slots_get(void) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); +} + +static int futex_hash_immutable_get(void) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); +} + +static void futex_hash_slots_set_verify(int slots) +{ + int ret; + + ret = futex_hash_slots_set(slots, 0); + if (ret != 0) { + ksft_test_result_fail("Failed to set slots to %d: %m\n", slots); + ksft_finished(); + } + ret = futex_hash_slots_get(); + if (ret != slots) { + ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n", + slots, ret); + ksft_finished(); + } + ksft_test_result_pass("SET and GET slots %d passed\n", slots); +} + +static void futex_hash_slots_set_must_fail(int slots, int flags) +{ + int ret; + + ret = futex_hash_slots_set(slots, flags); + ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n", + slots, flags); +} + +static void *thread_return_fn(void *arg) +{ + return NULL; +} + +static void *thread_lock_fn(void *arg) +{ + pthread_barrier_wait(&barrier_main); + + pthread_mutex_lock(&global_lock); + counter++; + usleep(20); + pthread_mutex_unlock(&global_lock); + return NULL; +} + +static void create_max_threads(void *(*thread_fn)(void *)) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_create(&threads[i], NULL, thread_fn, NULL); + if (ret) + ksft_exit_fail_msg("pthread_create failed: %m\n"); + } +} + +static void join_max_threads(void) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_join(threads[i], NULL); + if (ret) + ksft_exit_fail_msg("pthread_join failed for thread %d\n", i); + } +} + +static void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -g Test global hash instead intead local immutable \n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n"; +static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n"; + +int main(int argc, char *argv[]) +{ + int futex_slots1, futex_slotsn, online_cpus; + pthread_mutexattr_t mutex_attr_pi; + int use_global_hash = 0; + int ret; + char c; + + while ((c = getopt(argc, argv, "cghv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'g': + use_global_hash = 1; + break; + case 'h': + usage(basename(argv[0])); + exit(0); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(22); + + ret = pthread_mutexattr_init(&mutex_attr_pi); + ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); + ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi); + if (ret != 0) { + ksft_exit_fail_msg("Failed to initialize pthread mutex.\n"); + } + /* First thread, expect to be 0, not yet initialized */ + ret = futex_hash_slots_get(); + if (ret != 0) + ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret); + + ret = futex_hash_immutable_get(); + if (ret != 0) + ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret); + + ksft_test_result_pass("Basic get slots and immutable status.\n"); + ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL); + if (ret != 0) + ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret); + + ret = pthread_join(threads[0], NULL); + if (ret != 0) + ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret); + + /* First thread, has to initialiaze private hash */ + futex_slots1 = futex_hash_slots_get(); + if (futex_slots1 <= 0) { + ksft_print_msg("Current hash buckets: %d\n", futex_slots1); + ksft_exit_fail_msg(test_msg_auto_create); + } + + ksft_test_result_pass(test_msg_auto_create); + + online_cpus = sysconf(_SC_NPROCESSORS_ONLN); + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); + if (ret != 0) + ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n"); + + ret = pthread_mutex_lock(&global_lock); + if (ret != 0) + ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n"); + + counter = 0; + create_max_threads(thread_lock_fn); + pthread_barrier_wait(&barrier_main); + + /* + * The current default size of hash buckets is 16. The auto increase + * works only if more than 16 CPUs are available. + */ + ksft_print_msg("Online CPUs: %d\n", online_cpus); + if (online_cpus > 16) { + futex_slotsn = futex_hash_slots_get(); + if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) { + ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", + futex_slots1, futex_slotsn); + ksft_exit_fail_msg(test_msg_auto_inc); + } + ksft_test_result_pass(test_msg_auto_inc); + } else { + ksft_test_result_skip(test_msg_auto_inc); + } + ret = pthread_mutex_unlock(&global_lock); + + /* Once the user changes it, it has to be what is set */ + futex_hash_slots_set_verify(2); + futex_hash_slots_set_verify(4); + futex_hash_slots_set_verify(8); + futex_hash_slots_set_verify(32); + futex_hash_slots_set_verify(16); + + ret = futex_hash_slots_set(15, 0); + ksft_test_result(ret < 0, "Use 15 slots\n"); + + futex_hash_slots_set_verify(2); + join_max_threads(); + ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n", + counter, MAX_THREADS); + counter = 0; + /* Once the user set something, auto reisze must be disabled */ + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); + + create_max_threads(thread_lock_fn); + join_max_threads(); + + ret = futex_hash_slots_get(); + ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", + ret); + + futex_hash_slots_set_must_fail(1 << 29, 0); + + /* + * Once the private hash has been made immutable or global hash has been requested, + * then this requested can not be undone. + */ + if (use_global_hash) { + ret = futex_hash_slots_set(0, 0); + ksft_test_result(ret == 0, "Global hash request\n"); + } else { + ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE); + ksft_test_result(ret == 0, "Immutable resize to 4\n"); + } + if (ret != 0) + goto out; + + futex_hash_slots_set_must_fail(4, 0); + futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(8, 0); + futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE); + + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); + if (ret != 0) { + ksft_exit_fail_msg("pthread_barrier_init failed: %m\n"); + return 1; + } + create_max_threads(thread_lock_fn); + join_max_threads(); + + ret = futex_hash_slots_get(); + if (use_global_hash) { + ksft_test_result(ret == 0, "Continue to use global hash\n"); + } else { + ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n"); + } + + ret = futex_hash_immutable_get(); + ksft_test_result(ret == 1, "Hash reports to be immutable\n"); + +out: + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 5ccd599da6c3..81739849f299 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -82,3 +82,10 @@ echo echo ./futex_waitv $COLOR + +echo +./futex_priv_hash $COLOR +./futex_priv_hash -g $COLOR + +echo +./futex_numa_mpol $COLOR diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index 9d305520e849..ea79662405bc 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -8,6 +8,53 @@ #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) +#ifndef __NR_futex_waitv +#define __NR_futex_waitv 449 +struct futex_waitv { + __u64 val; + __u64 uaddr; + __u32 flags; + __u32 __reserved; +}; +#endif + +#ifndef __NR_futex_wake +#define __NR_futex_wake 454 +#endif + +#ifndef __NR_futex_wait +#define __NR_futex_wait 455 +#endif + +#ifndef FUTEX2_SIZE_U32 +#define FUTEX2_SIZE_U32 0x02 +#endif + +#ifndef FUTEX2_NUMA +#define FUTEX2_NUMA 0x04 +#endif + +#ifndef FUTEX2_MPOL +#define FUTEX2_MPOL 0x08 +#endif + +#ifndef FUTEX2_PRIVATE +#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG +#endif + +#ifndef FUTEX2_NO_NODE +#define FUTEX_NO_NODE (-1) +#endif + +#ifndef FUTEX_32 +#define FUTEX_32 FUTEX2_SIZE_U32 +#endif + +struct futex32_numa { + futex_t futex; + futex_t numa; +}; + /** * futex_waitv - Wait at multiple futexes, wake on any * @waiters: Array of waiters @@ -20,3 +67,26 @@ static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned lon { return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); } + +/* + * futex_wait() - block on uaddr with optional timeout + * @val: Expected value + * @flags: FUTEX2 flags + * @timeout: Relative timeout + * @clockid: Clock id for the timeout + */ +static inline int futex2_wait(void *uaddr, long val, unsigned int flags, + struct timespec *timeout, clockid_t clockid) +{ + return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid); +} + +/* + * futex2_wake() - Wake a number of futexes + * @nr: Number of threads to wake at most + * @flags: FUTEX2 flags + */ +static inline int futex2_wake(void *uaddr, int nr, unsigned int flags) +{ + return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags); +} diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index 67fe7a46cb62..e3000ccb9a5d 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -8,6 +8,13 @@ ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le)) TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh TEST_FILES := kexec_common_lib.sh +include ../../../scripts/Makefile.arch + +ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86) +TEST_PROGS += test_kexec_jump.sh +test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump +endif + include ../lib.mk endif diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c new file mode 100644 index 000000000000..fbce287866f5 --- /dev/null +++ b/tools/testing/selftests/kexec/test_kexec_jump.c @@ -0,0 +1,72 @@ +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <linux/kexec.h> +#include <linux/reboot.h> +#include <sys/reboot.h> +#include <sys/syscall.h> + +asm( + " .code64\n" + " .data\n" + "purgatory_start:\n" + + // Trigger kexec debug exception handling + " int3\n" + + // Set load address for next time + " leaq purgatory_start_b(%rip), %r11\n" + " movq %r11, 8(%rsp)\n" + + // Back to Linux + " ret\n" + + // Same again + "purgatory_start_b:\n" + + // Trigger kexec debug exception handling + " int3\n" + + // Set load address for next time + " leaq purgatory_start(%rip), %r11\n" + " movq %r11, 8(%rsp)\n" + + // Back to Linux + " ret\n" + + "purgatory_end:\n" + ".previous" +); +extern char purgatory_start[], purgatory_end[]; + +int main (void) +{ + struct kexec_segment segment = {}; + int ret; + + segment.buf = purgatory_start; + segment.bufsz = purgatory_end - purgatory_start; + segment.mem = (void *)0x400000; + segment.memsz = 0x1000; + ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT); + if (ret) { + perror("kexec_load"); + exit(1); + } + + ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC); + if (ret) { + perror("kexec reboot"); + exit(1); + } + + ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC); + if (ret) { + perror("kexec reboot"); + exit(1); + } + printf("Success\n"); + return 0; +} + diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh new file mode 100755 index 000000000000..6ae977054ba2 --- /dev/null +++ b/tools/testing/selftests/kexec/test_kexec_jump.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Prevent loading a kernel image via the kexec_load syscall when +# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.) + +TEST="$0" +. ./kexec_common_lib.sh + +# kexec requires root privileges +require_root_privileges + +# get the kernel config +get_kconfig + +kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled" +if [ $? -eq 0 ]; then + log_skip "kexec_jump is not enabled" +fi + +kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled" +ima_appraise=$? + +kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \ + "IMA architecture specific policy enabled" +arch_policy=$? + +get_secureboot_mode +secureboot=$? + +if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then + log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled" +fi + +./test_kexec_jump +if [ $? -eq 0 ]; then + log_pass "kexec_jump succeeded" +else + # The more likely failure mode if anything went wrong is that the + # kernel just crashes. But if we get back here, sure, whine anyway. + log_fail "kexec_jump failed" +fi diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile index 0c0d7b1234c1..4d4f810cdf2c 100644 --- a/tools/testing/selftests/mount_setattr/Makefile +++ b/tools/testing/selftests/mount_setattr/Makefile @@ -2,6 +2,8 @@ # Makefile for mount selftests. CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread +LOCAL_HDRS += ../filesystems/wrappers.h + TEST_GEN_PROGS := mount_setattr_test include ../lib.mk diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 48a000cabc97..8b378c91debf 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -20,7 +20,7 @@ #include <stdarg.h> #include <linux/mount.h> -#include "../filesystems/overlayfs/wrappers.h" +#include "../filesystems/wrappers.h" #include "../kselftest_harness.h" #ifndef CLONE_NEWNS @@ -107,46 +107,6 @@ #endif #endif -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -161,23 +121,6 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC -#endif - -#ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -#endif - -static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} - static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1076,7 +1019,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) ASSERT_EQ(mkdir("/mnt/D", 0777), 0); img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); ASSERT_GE(img_fd, 0); - ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0); + ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0); ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); ASSERT_EQ(close(img_fd), 0); diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c index 49dc1e831174..e03fe1b9bba2 100644 --- a/tools/testing/selftests/perf_events/watermark_signal.c +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -75,7 +75,7 @@ TEST(watermark_signal) if (waitpid(child, &child_status, WSTOPPED) != child || !(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) { fprintf(stderr, - "failed to sycnhronize with child errno=%d status=%x\n", + "failed to synchronize with child errno=%d status=%x\n", errno, child_status); goto cleanup; diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c index 51c414faabb0..96f274f0582b 100644 --- a/tools/testing/selftests/pid_namespace/pid_max.c +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> +#include <sys/mount.h> #include <sys/wait.h> #include "../kselftest_harness.h" diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 55bcf81a2b9a..efd74063126e 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -131,6 +131,26 @@ #define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */ #endif +#ifndef PIDFD_INFO_COREDUMP +#define PIDFD_INFO_COREDUMP (1UL << 4) +#endif + +#ifndef PIDFD_COREDUMPED +#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */ +#endif + +#ifndef PIDFD_COREDUMP_SKIP +#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */ +#endif + +#ifndef PIDFD_COREDUMP_USER +#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ +#endif + +#ifndef PIDFD_COREDUMP_ROOT +#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ +#endif + #ifndef PIDFD_THREAD #define PIDFD_THREAD O_EXCL #endif @@ -150,6 +170,8 @@ struct pidfd_info { __u32 fsuid; __u32 fsgid; __s32 exit_code; + __u32 coredump_mask; + __u32 __spare1; }; /* diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c index 7822dd080258..c094aeb1c620 100644 --- a/tools/testing/selftests/pidfd/pidfd_bind_mount.c +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -15,79 +15,7 @@ #include "pidfd.h" #include "../kselftest_harness.h" - -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#endif - -static inline int sys_move_mount(int from_dfd, const char *from_pathname, - int to_dfd, const char *to_pathname, - unsigned int flags) -{ - return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, - to_pathname, flags); -} - -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC -#endif - -#ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -#endif - -static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} +#include "../filesystems/wrappers.h" FIXTURE(pidfd_bind_mount) { char template[PATH_MAX]; diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c index 1758a1b0457b..a0eb6e81eaa2 100644 --- a/tools/testing/selftests/pidfd/pidfd_info_test.c +++ b/tools/testing/selftests/pidfd/pidfd_info_test.c @@ -299,6 +299,7 @@ TEST_F(pidfd_info, thread_group) /* Opening a thread as a thread-group leader must fail. */ pidfd_thread = sys_pidfd_open(pid_thread, 0); ASSERT_LT(pidfd_thread, 0); + ASSERT_EQ(errno, ENOENT); /* Opening a thread as a PIDFD_THREAD must succeed. */ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD); @@ -362,9 +363,9 @@ TEST_F(pidfd_info, thread_group) ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); - /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */ - ASSERT_TRUE(WIFEXITED(info.exit_code)); - ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); /* * Retrieve exit information for the thread-group leader via the @@ -375,9 +376,9 @@ TEST_F(pidfd_info, thread_group) ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS)); ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT)); - /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */ - ASSERT_TRUE(WIFEXITED(info2.exit_code)); - ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0); + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info2.exit_code)); + ASSERT_EQ(WTERMSIG(info2.exit_code), SIGKILL); /* Retrieve exit information for the thread. */ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index aad51e7c0183..991fb11306eb 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Call trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index ad79784e552d..957800c9ffba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -73,7 +73,7 @@ config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \ cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` -if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux +if test "$base_resdir" != "$resdir" && (test -f $base_resdir/bzImage || test -f $base_resdir/Image) && test -f $base_resdir/vmlinux then # Rerunning previous test, so use that test's kernel. QEMU="`identify_qemu $base_resdir/vmlinux`" diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index b07c11cf6929..21e6ba3615f6 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -148,7 +148,7 @@ then summary="$summary KCSAN: $n_kcsan" fi fi - n_calltrace=`grep -c 'Call Trace:' $file` + n_calltrace=`grep -Ec 'Call Trace:|Call trace:' $file` if test "$n_calltrace" -ne 0 then summary="$summary Call Traces: $n_calltrace" diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index 2db12c5cad9c..208be7d09a61 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -39,8 +39,9 @@ do shift done -err= nerrs=0 + +# Test lockdep's handling of deadlocks. for d in 0 1 do for t in 0 1 2 @@ -52,6 +53,12 @@ do tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 ret=$? mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi if test "$d" -ne 0 && test "$ret" -eq 0 then err=1 @@ -71,6 +78,39 @@ do done done done + +# Test lockdep-enabled testing of mixed SRCU readers. +for val in 0x1 0xf +do + err= + tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.reader_flavor=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 + ret=$? + mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi + if test "$val" -eq 0xf && test "$ret" -eq 0 + then + err=1 + echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test "$val" -eq 0x1 && test "$ret" -ne 0 + then + err=1 + echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test -n "$err" + then + grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + nerrs=$((nerrs+1)) + fi +done + +# Set up exit code. if test "$nerrs" -ne 0 then exit 1 diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 0447c4a00cc4..e03fdaca89b3 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -51,12 +51,15 @@ do_scftorture=yes do_rcuscale=yes do_refscale=yes do_kvfree=yes +do_normal=yes +explicit_normal=no do_kasan=yes do_kcsan=no do_clocksourcewd=yes do_rt=yes do_rcutasksflavors=yes do_srcu_lockdep=yes +do_rcu_rust=no # doyesno - Helper function for yes/no arguments function doyesno () { @@ -87,6 +90,7 @@ usage () { echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" echo " --do-refscale / --do-no-refscale / --no-refscale" echo " --do-rt / --do-no-rt / --no-rt" + echo " --do-rcu-rust / --do-no-rcu-rust / --no-rcu-rust" echo " --do-scftorture / --do-no-scftorture / --no-scftorture" echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep" echo " --duration [ <minutes> | <hours>h | <days>d ]" @@ -128,6 +132,8 @@ do do_refscale=yes do_rt=yes do_kvfree=yes + do_normal=yes + explicit_normal=no do_kasan=yes do_kcsan=yes do_clocksourcewd=yes @@ -161,11 +167,17 @@ do do_refscale=no do_rt=no do_kvfree=no + do_normal=no + explicit_normal=no do_kasan=no do_kcsan=no do_clocksourcewd=no do_srcu_lockdep=no ;; + --do-normal|--do-no-normal|--no-normal) + do_normal=`doyesno "$1" --do-normal` + explicit_normal=yes + ;; --do-rcuscale|--do-no-rcuscale|--no-rcuscale) do_rcuscale=`doyesno "$1" --do-rcuscale` ;; @@ -181,6 +193,9 @@ do --do-rt|--do-no-rt|--no-rt) do_rt=`doyesno "$1" --do-rt` ;; + --do-rcu-rust|--do-no-rcu-rust|--no-rcu-rust) + do_rcu_rust=`doyesno "$1" --do-rcu-rust` + ;; --do-scftorture|--do-no-scftorture|--no-scftorture) do_scftorture=`doyesno "$1" --do-scftorture` ;; @@ -242,6 +257,17 @@ trap 'rm -rf $T' 0 2 echo " --- " $scriptname $args | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log +if test "$do_normal" = "no" && test "$do_kasan" = "no" && test "$do_kcsan" = "no" +then + # Match old scripts so that "--do-none --do-rcutorture" does + # normal rcutorture testing, but no KASAN or KCSAN testing. + if test $explicit_normal = yes + then + echo " --- Everything disabled, so explicit --do-normal overridden" | tee -a $T/log + fi + do_normal=yes +fi + # Calculate rcutorture defaults and apportion time if test -z "$configs_rcutorture" then @@ -332,9 +358,12 @@ function torture_set { local kcsan_kmake_tag= local flavor=$1 shift - curflavor=$flavor - torture_one "$@" - mv $T/last-resdir $T/last-resdir-nodebug || : + if test "$do_normal" = "yes" + then + curflavor=$flavor + torture_one "$@" + mv $T/last-resdir $T/last-resdir-nodebug || : + fi if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan @@ -448,13 +477,57 @@ fi if test "$do_rt" = "yes" then - # With all post-boot grace periods forced to normal. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1" - torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + # In both runs, disable testing of RCU priority boosting because + # -rt doesn't like its interaction with testing of callback + # flooding. + + # With all post-boot grace periods forced to normal (default for PREEMPT_RT). + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcutorture.preempt_duration=0" + torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_RCU_NOCB_CPU=y" --trust-make # With all post-boot grace periods forced to expedited. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1" - torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcupdate.rcu_normal_after_boot=0 rcupdate.rcu_expedited=1 rcutorture.preempt_duration=0" + torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_NOCB_CPU=y" --trust-make +fi + +if test "$do_rcu_rust" = "yes" +then + echo " --- do-rcu-rust:" Start `date` | tee -a $T/log + rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust" + mkdir -p "$rrdir" + echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out + make LLVM=1 rustavailable > $T/rustavailable.out 2>&1 + retcode=$? + echo $retcode > $rrdir/rustavailable.exitcode + cat $T/rustavailable.out | tee -a $rrdir/log >> $rrdir/rustavailable.out 2>&1 + buildphase=rustavailable + if test "$retcode" -eq 0 + then + echo " --- Running 'make mrproper' in order to run kunit." | tee -a $rrdir/log > $rrdir/mrproper.out + make mrproper > $rrdir/mrproper.out 2>&1 + retcode=$? + echo $retcode > $rrdir/mrproper.exitcode + buildphase=mrproper + fi + if test "$retcode" -eq 0 + then + echo " --- Running rust_doctests_kernel." | tee -a $rrdir/log > $rrdir/rust_doctests_kernel.out + ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --make_options CLIPPY=1 --arch arm64 --kconfig_add CONFIG_SMP=y --kconfig_add CONFIG_WERROR=y --kconfig_add CONFIG_RUST=y rust_doctests_kernel >> $rrdir/rust_doctests_kernel.out 2>&1 + # @@@ Remove "--arch arm64" in order to test on native architecture? + # @@@ Analyze $rrdir/rust_doctests_kernel.out contents? + retcode=$? + echo $retcode > $rrdir/rust_doctests_kernel.exitcode + buildphase=rust_doctests_kernel + fi + if test "$retcode" -eq 0 + then + echo "rcu-rust($retcode)" $rrdir >> $T/successes + echo Success >> $rrdir/log + else + echo "rcu-rust($retcode)" $rrdir >> $T/failures + echo " --- rcu-rust Test summary:" >> $rrdir/log + echo " --- Summary: Exit code $retcode from $buildphase, see $rrdir/$buildphase.out" >> $rrdir/log + fi fi if test "$do_srcu_lockdep" = "yes" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 8ae41d5f81a3..54b1600c7eb5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -8,8 +8,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y -CONFIG_MAXSMP=y -CONFIG_CPUMASK_OFFSTACK=y CONFIG_RCU_NOCB_CPU=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index 40af3df0f397..1cc5b47dde28 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1,4 +1,4 @@ -maxcpus=8 nr_cpus=43 +maxcpus=8 nr_cpus=17 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 50e03eefe7ac..0443beacf362 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -3,7 +3,14 @@ # # Run installed kselftest tests. # -BASE_DIR=$(realpath $(dirname $0)) + +# Fallback to readlink if realpath is not available +if which realpath > /dev/null; then + BASE_DIR=$(realpath $(dirname $0)) +else + BASE_DIR=$(readlink -f $(dirname $0)) +fi + cd $BASE_DIR TESTS="$BASE_DIR"/kselftest-list.txt if [ ! -r "$TESTS" ] ; then diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index a951c0d33cd2..ddc97ecd8b39 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -573,5 +573,32 @@ "teardown": [ "$TC qdisc del dev $DEV1 handle 1: root" ] + }, + { + "id": "831d", + "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain", + "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"], + "plugins": { "requires": ["nsPlugin", "scapyPlugin"] }, + "setup": [ + "$IP link set dev $DEV1 up || true", + "$TC qdisc add dev $DEV1 root handle 1: drr", + "$TC filter add dev $DEV1 parent 1: basic classid 1:1", + "$TC class add dev $DEV1 parent 1: classid 1:1 drr", + "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem", + "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1", + "matchPattern": "qdisc hfsc", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json index e9469ee71e6f..6d515d0e5ed6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json @@ -189,5 +189,29 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "deb1", + "name": "CODEL test qdisc limit trimming", + "category": ["qdisc", "codel"], + "plugins": { + "requires": ["nsPlugin", "scapyPlugin"] + }, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json index 3a537b2ec4c9..24faf4e12dfa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json @@ -377,5 +377,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "9479", + "name": "FQ test qdisc limit trimming", + "category": ["qdisc", "fq"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json index 9774b1e8801b..4ce62b857fd7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json @@ -294,5 +294,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "0436", + "name": "FQ_CODEL test qdisc limit trimming", + "category": ["qdisc", "fq_codel"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index d012d88d67fe..229fe1bf4a90 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -18,5 +18,27 @@ "matchCount": "1", "teardown": [ ] + }, + { + "id": "83bf", + "name": "FQ_PIE test qdisc limit trimming", + "category": ["qdisc", "fq_pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json index dbef5474b26b..0ca19fac54a5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json @@ -188,5 +188,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "385f", + "name": "HHF test qdisc limit trimming", + "category": ["qdisc", "hhf"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json new file mode 100644 index 000000000000..1a98b66e8030 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json @@ -0,0 +1,24 @@ +[ + { + "id": "6158", + "name": "PIE test qdisc limit trimming", + "category": ["qdisc", "pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] + } +] diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c index 72d41b955fb2..5cc0010e85ff 100644 --- a/tools/testing/selftests/timens/clock_nanosleep.c +++ b/tools/testing/selftests/timens/clock_nanosleep.c @@ -38,7 +38,7 @@ void *call_nanosleep(void *_args) return NULL; } -int run_test(int clockid, int abs) +static int run_test(int clockid, int abs) { struct timespec now = {}, rem; struct thread_args args = { .now = &now, .rem = &rem, .clockid = clockid}; @@ -115,6 +115,8 @@ int main(int argc, char *argv[]) { int ret, nsfd; + ksft_print_header(); + nscheck(); ksft_set_plan(4); diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index d12ff955de0d..a644162d56fd 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -36,6 +36,8 @@ int main(int argc, char *argv[]) return 0; } + ksft_print_header(); + nscheck(); ksft_set_plan(1); diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c index 6b2b9264e851..339633ae037a 100644 --- a/tools/testing/selftests/timens/futex.c +++ b/tools/testing/selftests/timens/futex.c @@ -66,6 +66,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec mtime_now; + ksft_print_header(); + nscheck(); ksft_set_plan(2); diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 6b13dc277724..d6658b7b7548 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -67,6 +67,8 @@ int main(int argc, char *argv[]) time_t offset = 10; int nsfd; + ksft_print_header(); + ksft_set_plan(8); fill_function_pointers(); diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c index 1833ca97eb24..0a9ff90ee69a 100644 --- a/tools/testing/selftests/timens/procfs.c +++ b/tools/testing/selftests/timens/procfs.c @@ -180,6 +180,8 @@ int main(int argc, char *argv[]) { int ret = 0; + ksft_print_header(); + nscheck(); ksft_set_plan(2); diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c index 387220791a05..a9c0534ef8f6 100644 --- a/tools/testing/selftests/timens/timens.c +++ b/tools/testing/selftests/timens/timens.c @@ -151,6 +151,8 @@ int main(int argc, char *argv[]) time_t offset; int ret = 0; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5b939f59dfa4..79543ceb2c0f 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -15,7 +15,7 @@ #include "log.h" #include "timens.h" -int run_test(int clockid, struct timespec now) +static int run_test(int clockid, struct timespec now) { struct itimerspec new_value; long long elapsed; @@ -75,6 +75,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec btime_now, mtime_now; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index a4196bbd6e33..402e2e415545 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -15,14 +15,14 @@ #include "log.h" #include "timens.h" -static int tclock_gettime(clock_t clockid, struct timespec *now) +static int tclock_gettime(clockid_t clockid, struct timespec *now) { if (clockid == CLOCK_BOOTTIME_ALARM) clockid = CLOCK_BOOTTIME; return clock_gettime(clockid, now); } -int run_test(int clockid, struct timespec now) +static int run_test(int clockid, struct timespec now) { struct itimerspec new_value; long long elapsed; @@ -82,6 +82,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec btime_now, mtime_now; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index 5b8907bf451d..b957e1a65124 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -91,6 +91,8 @@ int main(int argc, char *argv[]) return check("child after exec", &now); } + ksft_print_header(); + nscheck(); ksft_set_plan(4); diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index f34ac0bac696..4dde8838261d 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)/usr/include +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh @@ -11,6 +15,11 @@ TEST_PROGS += test_generic_05.sh TEST_PROGS += test_generic_06.sh TEST_PROGS += test_generic_07.sh +TEST_PROGS += test_generic_08.sh +TEST_PROGS += test_generic_09.sh +TEST_PROGS += test_generic_10.sh +TEST_PROGS += test_generic_11.sh + TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh TEST_PROGS += test_loop_01.sh diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c index 94a8e729ba4c..5421774d7867 100644 --- a/tools/testing/selftests/ublk/fault_inject.c +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -16,6 +16,11 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; unsigned long dev_size = 250UL << 30; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + dev->tgt.dev_size = dev_size; dev->tgt.params = (struct ublk_params) { .types = UBLK_PARAM_TYPE_BASIC, diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 6f34eabfae97..509842df9bee 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -29,19 +29,23 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); - int zc = ublk_queue_use_zc(q); - enum io_uring_op op = ublk_to_uring_op(iod, zc); + unsigned zc = ublk_queue_use_zc(q); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc); struct io_uring_sqe *sqe[3]; + void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr; - if (!zc) { + if (!zc || auto_zc) { ublk_queue_alloc_sqes(q, sqe, 1); if (!sqe[0]) return -ENOMEM; io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, - (void *)iod->addr, + addr, iod->nr_sectors << 9, iod->start_sector << 9); + if (auto_zc) + sqe[0]->buf_index = tag; io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1); @@ -145,6 +149,11 @@ static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) }, }; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + ret = backing_file_tgt_init(dev); if (ret) return ret; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 842b40736a9b..b5131a000795 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -216,6 +216,30 @@ static int ublk_ctrl_get_features(struct ublk_dev *dev, return __ublk_ctrl_cmd(dev, &data); } +static int ublk_ctrl_update_size(struct ublk_dev *dev, + __u64 nr_sects) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_UPDATE_SIZE, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = nr_sects; + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, + unsigned int timeout_ms) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_QUIESCE_DEV, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = timeout_ms; + return __ublk_ctrl_cmd(dev, &data); +} + static const char *ublk_dev_state_desc(struct ublk_dev *dev) { switch (dev->dev_info.state) { @@ -405,7 +429,7 @@ static void ublk_queue_deinit(struct ublk_queue *q) free(q->ios[i].buf_addr); } -static int ublk_queue_init(struct ublk_queue *q) +static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) { struct ublk_dev *dev = q->dev; int depth = dev->dev_info.queue_depth; @@ -420,10 +444,14 @@ static int ublk_queue_init(struct ublk_queue *q) q->cmd_inflight = 0; q->tid = gettid(); - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { q->state |= UBLKSRV_NO_BUF; - q->state |= UBLKSRV_ZC; + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) + q->state |= UBLKSRV_ZC; + if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) + q->state |= UBLKSRV_AUTO_BUF_REG; } + q->state |= extra_flags; cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); @@ -461,7 +489,7 @@ static int ublk_queue_init(struct ublk_queue *q) goto fail; } - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth); if (ret) { ublk_err("ublk dev %d queue %d register spare buffers failed %d", @@ -525,6 +553,23 @@ static void ublk_dev_unprep(struct ublk_dev *dev) close(dev->fds[0]); } +static void ublk_set_auto_buf_reg(const struct ublk_queue *q, + struct io_uring_sqe *sqe, + unsigned short tag) +{ + struct ublk_auto_buf_reg buf = {}; + + if (q->tgt_ops->buf_index) + buf.index = q->tgt_ops->buf_index(q, tag); + else + buf.index = tag; + + if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; + + sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); +} + int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) { struct ublksrv_io_cmd *cmd; @@ -579,6 +624,9 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) else cmd->addr = 0; + if (q->state & UBLKSRV_AUTO_BUF_REG) + ublk_set_auto_buf_reg(q, sqe[0], tag); + user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0); io_uring_sqe_set_data64(sqe[0], user_data); @@ -729,6 +777,7 @@ struct ublk_queue_info { struct ublk_queue *q; sem_t *queue_sem; cpu_set_t *affinity; + unsigned char auto_zc_fallback; }; static void *ublk_io_handler_fn(void *data) @@ -736,9 +785,13 @@ static void *ublk_io_handler_fn(void *data) struct ublk_queue_info *info = data; struct ublk_queue *q = info->q; int dev_id = q->dev->dev_info.dev_id; + unsigned extra_flags = 0; int ret; - ret = ublk_queue_init(q); + if (info->auto_zc_fallback) + extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; + + ret = ublk_queue_init(q, extra_flags); if (ret) { ublk_err("ublk dev %d queue %d init queue failed\n", dev_id, q->q_id); @@ -831,6 +884,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) qinfo[i].q = &dev->q[i]; qinfo[i].queue_sem = &queue_sem; qinfo[i].affinity = &affinity_buf[i]; + qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback; pthread_create(&dev->q[i].thread, NULL, ublk_io_handler_fn, &qinfo[i]); @@ -1011,6 +1065,9 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) info->nr_hw_queues = nr_queues; info->queue_depth = depth; info->flags = ctx->flags; + if ((features & UBLK_F_QUIESCE) && + (info->flags & UBLK_F_USER_RECOVERY)) + info->flags |= UBLK_F_QUIESCE; dev->tgt.ops = ops; dev->tgt.sq_depth = depth; dev->tgt.cq_depth = depth; @@ -1206,6 +1263,9 @@ static int cmd_dev_get_features(void) [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", [const_ilog2(UBLK_F_ZONED)] = "ZONED", [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", + [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", + [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", + [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", }; struct ublk_dev *dev; __u64 features = 0; @@ -1239,13 +1299,66 @@ static int cmd_dev_get_features(void) return ret; } +static int cmd_dev_update_size(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + struct ublk_params p; + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided\n"); + goto out; + } + + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_get_params(dev, &p); + if (ret < 0) { + ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); + goto out; + } + + if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { + ublk_err("size isn't aligned with logical block size\n"); + ret = -EINVAL; + goto out; + } + + ret = ublk_ctrl_update_size(dev, ctx->size >> 9); +out: + ublk_ctrl_deinit(dev); + return ret; +} + +static int cmd_dev_quiesce(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided for quiesce\n"); + goto out; + } + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_quiesce_dev(dev, 10000); + +out: + ublk_ctrl_deinit(dev); + return ret; +} + static void __cmd_create_help(char *exe, bool recovery) { int i; printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); + printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1281,6 +1394,8 @@ static int cmd_dev_help(char *exe) printf("%s list [-n dev_id] -a \n", exe); printf("\t -a list all devices, -n list specified device, default -a \n\n"); printf("%s features\n", exe); + printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); + printf("%s quiesce -n dev_id\n", exe); return 0; } @@ -1300,6 +1415,9 @@ int main(int argc, char *argv[]) { "recovery_fail_io", 1, NULL, 'e'}, { "recovery_reissue", 1, NULL, 'i'}, { "get_data", 1, NULL, 'g'}, + { "auto_zc", 0, NULL, 0 }, + { "auto_zc_fallback", 0, NULL, 0 }, + { "size", 1, NULL, 's'}, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1321,7 +1439,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1361,6 +1479,9 @@ int main(int argc, char *argv[]) case 'g': ctx.flags |= UBLK_F_NEED_GET_DATA; break; + case 's': + ctx.size = strtoull(optarg, NULL, 10); + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); @@ -1368,6 +1489,10 @@ int main(int argc, char *argv[]) ublk_dbg_mask = 0; if (!strcmp(longopts[option_idx].name, "foreground")) ctx.fg = 1; + if (!strcmp(longopts[option_idx].name, "auto_zc")) + ctx.flags |= UBLK_F_AUTO_BUF_REG; + if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) + ctx.auto_zc_fallback = 1; break; case '?': /* @@ -1391,6 +1516,16 @@ int main(int argc, char *argv[]) } } + /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ + if (ctx.auto_zc_fallback && + !((ctx.flags & UBLK_F_AUTO_BUF_REG) && + (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { + ublk_err("%s: auto_zc_fallback is set but neither " + "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", + __func__); + return -EINVAL; + } + i = optind; while (i < argc && ctx.nr_files < MAX_BACK_FILES) { ctx.files[ctx.nr_files++] = argv[i++]; @@ -1423,6 +1558,10 @@ int main(int argc, char *argv[]) ret = cmd_dev_help(argv[0]); else if (!strcmp(cmd, "features")) ret = cmd_dev_get_features(); + else if (!strcmp(cmd, "update_size")) + ret = cmd_dev_update_size(&ctx); + else if (!strcmp(cmd, "quiesce")) + ret = cmd_dev_quiesce(&ctx); else cmd_dev_help(argv[0]); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 44ee1e4ac55b..e34508bf5798 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -19,7 +19,6 @@ #include <sys/inotify.h> #include <sys/wait.h> #include <sys/eventfd.h> -#include <sys/uio.h> #include <sys/ipc.h> #include <sys/shm.h> #include <linux/io_uring.h> @@ -85,6 +84,7 @@ struct dev_ctx { unsigned int all:1; unsigned int fg:1; unsigned int recovery:1; + unsigned int auto_zc_fallback:1; int _evtfd; int _shmid; @@ -92,6 +92,9 @@ struct dev_ctx { /* built from shmem, only for ublk_dump_dev() */ struct ublk_dev *shadow_dev; + /* for 'update_size' command */ + unsigned long long size; + union { struct stripe_ctx stripe; struct fault_inject_ctx fault_inject; @@ -116,6 +119,7 @@ struct ublk_io { #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) #define UBLKSRV_NEED_GET_DATA (1UL << 3) +#define UBLKSRV_NEED_REG_BUF (1UL << 4) unsigned short flags; unsigned short refs; /* used by target code only */ @@ -141,6 +145,9 @@ struct ublk_tgt_ops { */ void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); void (*usage)(const struct ublk_tgt_ops *ops); + + /* return buffer index for UBLK_F_AUTO_BUF_REG */ + unsigned short (*buf_index)(const struct ublk_queue *, int tag); }; struct ublk_tgt { @@ -169,6 +176,8 @@ struct ublk_queue { #define UBLKSRV_QUEUE_IDLE (1U << 1) #define UBLKSRV_NO_BUF (1U << 2) #define UBLKSRV_ZC (1U << 3) +#define UBLKSRV_AUTO_BUF_REG (1U << 4) +#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) unsigned state; pid_t tid; pthread_t thread; @@ -204,6 +213,12 @@ struct ublk_dev { extern unsigned int ublk_dbg_mask; extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag); + +static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) +{ + return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); +} + static inline int is_target_io(__u64 user_data) { return (user_data & (1ULL << 63)) != 0; @@ -388,6 +403,11 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q) return q->state & UBLKSRV_ZC; } +static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) +{ + return q->state & UBLKSRV_AUTO_BUF_REG; +} + extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 91fec3690d4b..44aca31cf2b0 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -42,10 +42,22 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) return 0; } +static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod, + struct io_uring_sqe *sqe) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + io_uring_prep_nop(sqe); + sqe->buf_index = tag; + sqe->flags |= IOSQE_FIXED_FILE; + sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; + sqe->len = iod->nr_sectors << 9; /* injected result */ + sqe->user_data = build_user_data(tag, ublk_op, 0, 1); +} + static int null_queue_zc_io(struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); - unsigned ublk_op = ublksrv_get_op(iod); struct io_uring_sqe *sqe[3]; ublk_queue_alloc_sqes(q, sqe, 3); @@ -55,12 +67,8 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; - io_uring_prep_nop(sqe[1]); - sqe[1]->buf_index = tag; - sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; - sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; - sqe[1]->len = iod->nr_sectors << 9; /* injected result */ - sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1); + __setup_nop_io(tag, iod, sqe[1]); + sqe[1]->flags |= IOSQE_IO_HARDLINK; io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1); @@ -69,6 +77,16 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) return 2; } +static int null_queue_auto_zc_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe[1]; + + ublk_queue_alloc_sqes(q, sqe, 1); + __setup_nop_io(tag, iod, sqe[0]); + return 1; +} + static void ublk_null_io_done(struct ublk_queue *q, int tag, const struct io_uring_cqe *cqe) { @@ -94,22 +112,37 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag, static int ublk_null_queue_io(struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); - int zc = ublk_queue_use_zc(q); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + unsigned zc = ublk_queue_use_zc(q); int queued; - if (!zc) { + if (auto_zc && !ublk_io_auto_zc_fallback(iod)) + queued = null_queue_auto_zc_io(q, tag); + else if (zc) + queued = null_queue_zc_io(q, tag); + else { ublk_complete_io(q, tag, iod->nr_sectors << 9); return 0; } - - queued = null_queue_zc_io(q, tag); ublk_queued_tgt_io(q, tag, queued); return 0; } +/* + * return invalid buffer index for triggering auto buffer register failure, + * then UBLK_IO_RES_NEED_REG_BUF handling is covered + */ +static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag) +{ + if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + return (unsigned short)-1; + return tag; +} + const struct ublk_tgt_ops null_tgt_ops = { .name = "null", .init_tgt = ublk_null_tgt_init, .queue_io = ublk_null_queue_io, .tgt_io_done = ublk_null_io_done, + .buf_index = ublk_null_buf_index, }; diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 5dbd6392d83d..404a143bf3d6 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -70,7 +70,7 @@ static void free_stripe_array(struct stripe_array *s) } static void calculate_stripe_array(const struct stripe_conf *conf, - const struct ublksrv_io_desc *iod, struct stripe_array *s) + const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base) { const unsigned shift = conf->shift - 9; const unsigned chunk_sects = 1 << shift; @@ -102,7 +102,7 @@ static void calculate_stripe_array(const struct stripe_conf *conf, } assert(this->nr_vec < this->cap); - this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done); + this->vec[this->nr_vec].iov_base = (void *)(base + done); this->vec[this->nr_vec++].iov_len = nr_sects << 9; start += nr_sects; @@ -126,15 +126,17 @@ static inline enum io_uring_op stripe_to_uring_op( static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); - int zc = !!(ublk_queue_use_zc(q) != 0); - enum io_uring_op op = stripe_to_uring_op(iod, zc); + unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); + unsigned zc = (ublk_queue_use_zc(q) != 0); + enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc); struct io_uring_sqe *sqe[NR_STRIPE]; struct stripe_array *s = alloc_stripe_array(conf, iod); struct ublk_io *io = ublk_get_io(q, tag); int i, extra = zc ? 2 : 0; + void *base = (zc | auto_zc) ? NULL : (void *)iod->addr; io->private_data = s; - calculate_stripe_array(conf, iod, s); + calculate_stripe_array(conf, iod, s, base); ublk_queue_alloc_sqes(q, sqe, s->nr + extra); @@ -153,12 +155,11 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ (void *)t->vec, t->nr_vec, t->start << 9); - if (zc) { + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (auto_zc || zc) { sqe[i]->buf_index = tag; - io_uring_sqe_set_flags(sqe[i], - IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK); - } else { - io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (zc) + sqe[i]->flags |= IOSQE_IO_HARDLINK; } /* bit63 marks us as tgt io */ sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1); @@ -287,6 +288,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) loff_t bytes = 0; int ret, i, mul = 1; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + if ((chunk_size & (chunk_size - 1)) || !chunk_size) { ublk_err("invalid chunk size %u\n", chunk_size); return -EINVAL; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index a81210ca3e99..0145569ee7e9 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -23,6 +23,11 @@ _get_disk_dev_t() { echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } +_get_disk_size() +{ + lsblk -b -o SIZE -n "$1" +} + _run_fio_verify_io() { fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ @@ -215,6 +220,26 @@ _recover_ublk_dev() { echo "$state" } +# quiesce device and return ublk device state +__ublk_quiesce_dev() +{ + local dev_id=$1 + local exp_state=$2 + local state + + if ! ${UBLK_PROG} quiesce -n "${dev_id}"; then + state=$(_get_ublk_dev_state "${dev_id}") + return "$state" + fi + + for ((j=0;j<50;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "$exp_state" ] && break + sleep 1 + done + echo "$state" +} + # kill the ublk daemon and return ublk device state __ublk_kill_daemon() { @@ -251,7 +276,7 @@ __run_io_and_remove() local kill_server=$3 fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ + --rw=randrw --norandommap --iodepth=256 --size="${size}" --numjobs="$(nproc)" \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 2 if [ "${kill_server}" = "yes" ]; then @@ -303,20 +328,26 @@ run_io_and_kill_daemon() run_io_and_recover() { + local action=$1 local state local dev_id + shift 1 dev_id=$(_add_ublk_dev "$@") _check_add_dev "$TID" $? fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ + --rw=randread --iodepth=256 --size="${size}" --numjobs=4 \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 4 - state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + if [ "$action" == "kill_daemon" ]; then + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + elif [ "$action" == "quiesce_dev" ]; then + state=$(__ublk_quiesce_dev "${dev_id}" "QUIESCED") + fi if [ "$state" != "QUIESCED" ]; then - echo "device isn't quiesced($state) after killing daemon" + echo "device isn't quiesced($state) after $action" return 255 fi diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh index 8a3bc080c577..8b533217d4a1 100755 --- a/tools/testing/selftests/ublk/test_generic_04.sh +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -8,7 +8,7 @@ ERR_CODE=0 ublk_run_recover_test() { - run_io_and_recover "$@" + run_io_and_recover "kill_daemon" "$@" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then echo "$TID failure: $*" diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh index 3bb00a347402..398e9e2b58e1 100755 --- a/tools/testing/selftests/ublk/test_generic_05.sh +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -8,7 +8,7 @@ ERR_CODE=0 ublk_run_recover_test() { - run_io_and_recover "$@" + run_io_and_recover "kill_daemon" "$@" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then echo "$TID failure: $*" diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh index b67230c42c84..fd42062b7b76 100755 --- a/tools/testing/selftests/ublk/test_generic_06.sh +++ b/tools/testing/selftests/ublk/test_generic_06.sh @@ -17,7 +17,7 @@ STARTTIME=${SECONDS} dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & dd_pid=$! -__ublk_kill_daemon ${dev_id} "DEAD" +__ublk_kill_daemon ${dev_id} "DEAD" >/dev/null wait $dd_pid dd_exitcode=$? diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh new file mode 100755 index 000000000000..b222f3a77e12 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_08.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_08" +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_AUTO_BUF_REG" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t loop -q 2 --auto_zc "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then + _cleanup_test "generic" + _show_result $TID 255 +fi + +dev_id=$(_add_ublk_dev -t stripe --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh new file mode 100755 index 000000000000..bb6f77ca5522 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_09.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_09" +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test" + +dev_id=$(_add_ublk_dev -t null -z --auto_zc --auto_zc_fallback) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "null" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_10.sh b/tools/testing/selftests/ublk/test_generic_10.sh new file mode 100755 index 000000000000..abc11c3d416b --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_10.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_10" +ERR_CODE=0 + +if ! _have_feature "UPDATE_SIZE"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "check update size" + +dev_id=$(_add_ublk_dev -t null) +_check_add_dev $TID $? + +size=$(_get_disk_size /dev/ublkb"${dev_id}") +size=$(( size / 2 )) +if ! "$UBLK_PROG" update_size -n "$dev_id" -s "$size"; then + ERR_CODE=255 +fi + +new_size=$(_get_disk_size /dev/ublkb"${dev_id}") +if [ "$new_size" != "$size" ]; then + ERR_CODE=255 +fi + +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_generic_11.sh new file mode 100755 index 000000000000..a00357a5ec6b --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_11.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_11" +ERR_CODE=0 + +ublk_run_quiesce_recover() +{ + run_io_and_recover "quiesce_dev" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_feature "QUIESCE"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "quiesce" "basic quiesce & recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_quiesce_recover -t null -q 2 -r 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_quiesce_recover -t null -q 2 -r 1 -i 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "quiesce" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 1a9065125ae1..4bdd921081e5 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -25,10 +25,12 @@ _create_backfile 0 256M _create_backfile 1 128M _create_backfile 2 128M -ublk_io_and_kill_daemon 8G -t null -q 4 & -ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & -ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & -wait +for nr_queue in 1 4; do + ublk_io_and_kill_daemon 8G -t null -q "$nr_queue" & + ublk_io_and_kill_daemon 256M -t loop -q "$nr_queue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q "$nr_queue" "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + wait +done _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh index e0854f71d35b..7d728ce50774 100755 --- a/tools/testing/selftests/ublk/test_stress_03.sh +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -32,6 +32,13 @@ _create_backfile 2 128M ublk_io_and_remove 8G -t null -q 4 -z & ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_remove 8G -t null -q 4 --auto_zc & + ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & +fi wait _cleanup_test "stress" diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index 1798a98387e8..9bcfa64ea1f0 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -31,6 +31,13 @@ _create_backfile 2 128M ublk_io_and_kill_daemon 8G -t null -q 4 -z & ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & + ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & +fi wait _cleanup_test "stress" diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index 88601b48f1cd..bcfc904cefc6 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -60,5 +60,14 @@ if _have_feature "ZERO_COPY"; then done fi +if _have_feature "AUTO_BUF_REG"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" & + wait + done +fi + _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 28422c32cc8f..f703fcfe9f7c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header amx lam test_shadow_stack avx + corrupt_xstate_header amx lam test_shadow_stack avx apx # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall @@ -136,3 +136,4 @@ $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack $(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f $(OUTPUT)/amx_64: EXTRA_FILES += xstate.c $(OUTPUT)/avx_64: EXTRA_FILES += xstate.c +$(OUTPUT)/apx_64: EXTRA_FILES += xstate.c diff --git a/tools/testing/selftests/x86/apx.c b/tools/testing/selftests/x86/apx.c new file mode 100644 index 000000000000..d9c8d41b8c5a --- /dev/null +++ b/tools/testing/selftests/x86/apx.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include "xstate.h" + +int main(void) +{ + test_xstate(XFEATURE_APX); +} diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile new file mode 100644 index 000000000000..8ff2d7226c7f --- /dev/null +++ b/tools/testing/selftests/x86/bugs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py +TEST_FILES := common.py +include ../../lib.mk diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py new file mode 100755 index 000000000000..2f9664a80617 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/common.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# This contains kselftest framework adapted common functions for testing +# mitigation for x86 bugs. + +import os, sys, re, shutil + +sys.path.insert(0, '../../kselftest') +import ksft + +def read_file(path): + if not os.path.exists(path): + return None + with open(path, 'r') as file: + return file.read().strip() + +def cpuinfo_has(arg): + cpuinfo = read_file('/proc/cpuinfo') + if arg in cpuinfo: + return True + return False + +def cmdline_has(arg): + cmdline = read_file('/proc/cmdline') + if arg in cmdline: + return True + return False + +def cmdline_has_either(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg in cmdline: + return True + return False + +def cmdline_has_none(args): + return not cmdline_has_either(args) + +def cmdline_has_all(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg not in cmdline: + return False + return True + +def get_sysfs(bug): + return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug) + +def sysfs_has(bug, mitigation): + status = get_sysfs(bug) + if mitigation in status: + return True + return False + +def sysfs_has_either(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if sysfs_has(bug, mitigation): + return True + return False + +def sysfs_has_none(bugs, mitigations): + return not sysfs_has_either(bugs, mitigations) + +def sysfs_has_all(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if not sysfs_has(bug, mitigation): + return False + return True + +def bug_check_pass(bug, found): + ksft.print_msg(f"\nFound: {found}") + # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_pass(f'{bug}: {found}') + +def bug_check_fail(bug, found, expected): + ksft.print_msg(f'\nFound:\t {found}') + ksft.print_msg(f'Expected:\t {expected}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def bug_status_unknown(bug, found): + ksft.print_msg(f'\nUnknown status: {found}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def basic_checks_sufficient(bug, mitigation): + if not mitigation: + bug_status_unknown(bug, "None") + return True + elif mitigation == "Not affected": + ksft.test_result_pass(bug) + return True + elif mitigation == "Vulnerable": + if cmdline_has_either([f'{bug}=off', 'mitigations=off']): + bug_check_pass(bug, mitigation) + return True + return False + +def get_section_info(vmlinux, section_name): + from elftools.elf.elffile import ELFFile + with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + section = elffile.get_section_by_name(section_name) + if section is None: + ksft.print_msg("Available sections in vmlinux:") + for sec in elffile.iter_sections(): + ksft.print_msg(sec.name) + raise ValueError(f"Section {section_name} not found in {vmlinux}") + return section['sh_addr'], section['sh_offset'], section['sh_size'] + +def get_patch_sites(vmlinux, offset, size): + import struct + output = [] + with open(vmlinux, 'rb') as f: + f.seek(offset) + i = 0 + while i < size: + data = f.read(4) # s32 + if not data: + break + sym_offset = struct.unpack('<i', data)[0] + i + i += 4 + output.append(sym_offset) + return output + +def get_instruction_from_vmlinux(elffile, section, virtual_address, target_address): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64 + section_start = section['sh_addr'] + section_end = section_start + section['sh_size'] + + if not (section_start <= target_address < section_end): + return None + + offset = target_address - section_start + code = section.data()[offset:offset + 16] + + cap = init_capstone() + for instruction in cap.disasm(code, target_address): + if instruction.address == target_address: + return instruction + return None + +def init_capstone(): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_OPT_SYNTAX_ATT + cap = Cs(CS_ARCH_X86, CS_MODE_64) + cap.syntax = CS_OPT_SYNTAX_ATT + return cap + +def get_runtime_kernel(): + import drgn + return drgn.program_from_kernel() + +def check_dependencies_or_skip(modules, script_name="unknown test"): + for mod in modules: + try: + __import__(mod) + except ImportError: + ksft.test_result_skip(f"Skipping {script_name}: missing module '{mod}'") + ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_indirect_alignment.py b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py new file mode 100755 index 000000000000..cdc33ae6a91c --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Test if indirect CALL/JMP are correctly patched by evaluating +# the vmlinux .retpoline_sites in /proc/kcore. + +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Best to copy the vmlinux at a standard location: +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_indirect_alignment.py [vmlinux] + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" + +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_indirect_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +if c.sysfs_has("spectre_v2", "Retpolines"): + ksft.test_result_skip("Skipping its_indirect_alignment.py: Retpolines deployed") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_indirect_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites') +ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size) +total_retpoline_tests = len(sites_offset) +ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites") + +prog = c.get_runtime_kernel() +retpolines_start_kcore = prog.symbol('__retpoline_sites').address +ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}') + +x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address +ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(0, len(sites_offset)): + site = retpolines_start_kcore + sites_offset[i] + vmlinux_site = retpolines_start_vmlinux + sites_offset[i] + passed = unknown = failed = False + try: + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + operand = kcore_insn.op_str + insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if (site & 0x20) ^ (insn_end & 0x20): + ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}") + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if operand.startswith('0xffffffff'): + thunk = int(operand, 16) + if thunk > x86_indirect_its_thunk_r15: + insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0] + operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' <dynamic-thunk?>' + if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20: + ksft.print_msg(f"\tPASSED: Found {operand} at safe address") + passed = True + if not passed: + if kcore_insn.operands[0].type == capstone.CS_OP_IMM: + operand += ' <' + prog.symbol(int(operand, 16)) + '>' + if '__x86_indirect_its_thunk_' in operand: + ksft.print_msg(f"\tPASSED: Found {operand}") + else: + ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.") + passed = True + else: + unknown = True + if passed: + tests_passed += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed") +else: + ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py new file mode 100755 index 000000000000..3204f4728c62 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_permutations.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) cmdline permutations with other bugs +# like spectre_v2 and retbleed. + +import os, sys, subprocess, itertools, re, shutil + +test_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, test_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) + +if not mitigation or "Not affected" in mitigation: + ksft.test_result_skip("Skipping its_permutations.py: not applicable") + ksft.finished() + +if shutil.which('vng') is None: + ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.") + ksft.finished() + +TEST = f"{test_dir}/its_sysfs.py" +default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1'] + +DEBUG = " -v " + +# Install dependencies +# https://github.com/arighi/virtme-ng +# apt install virtme-ng +BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage " +#BOOT_CMD += DEBUG + +bug = "indirect_target_selection" + +input_options = { + 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'], + 'retbleed' : ['off', 'stuff', 'auto'], + 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'], +} + +def pretty_print(output): + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + + # Define patterns and their corresponding colors + patterns = { + r"^ok \d+": OKGREEN, + r"^not ok \d+": FAIL, + r"^# Testing .*": OKBLUE, + r"^# Found: .*": WARNING, + r"^# Totals: .*": BOLD, + r"pass:([1-9]\d*)": OKGREEN, + r"fail:([1-9]\d*)": FAIL, + r"skip:([1-9]\d*)": WARNING, + } + + # Apply colors based on patterns + for pattern, color in patterns.items(): + output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE) + + print(output) + +combinations = list(itertools.product(*input_options.values())) +ksft.print_header() +ksft.set_plan(len(combinations)) + +logs = "" + +for combination in combinations: + append = "" + log = "" + for p in default_kparam: + append += f' --append={p}' + command = BOOT_CMD + append + test_params = "" + for i, key in enumerate(input_options.keys()): + param = f'{key}={combination[i]}' + test_params += f' {param}' + command += f" --append={param}" + command += f" -- {TEST}" + test_name = f"{bug} {test_params}" + pretty_print(f'# Testing {test_name}') + t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + t.wait() + output, _ = t.communicate() + if t.returncode == 0: + ksft.test_result_pass(test_name) + else: + ksft.test_result_fail(test_name) + output = output.decode() + log += f" {output}" + pretty_print(log) + logs += output + "\n" + +# Optionally use tappy to parse the output +# apt install python3-tappy +with open("logs.txt", "w") as f: + f.write(logs) + +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py new file mode 100755 index 000000000000..f40078d9f6ff --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Tests if the RETs are correctly patched by evaluating the +# vmlinux .return_sites in /proc/kcore. +# +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Run on target machine +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_ret_alignment.py + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites') +ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size) +total_rethunk_tests = len(sites_offset) +ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites") + +prog = c.get_runtime_kernel() +rethunks_start_kcore = prog.symbol('__return_sites').address +ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}') + +its_return_thunk = prog.symbol('its_return_thunk').address +ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 +tests_skipped = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(len(sites_offset)): + site = rethunks_start_kcore + sites_offset[i] + vmlinux_site = rethunks_start_vmlinux + sites_offset[i] + try: + passed = unknown = failed = skipped = False + + symbol = identify_address(prog, site) + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + + insn_end = site + kcore_insn.size - 1 + + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if "jmp" in kcore_insn.mnemonic: + passed = True + elif "ret" not in kcore_insn.mnemonic: + skipped = True + + if passed: + ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}") + tests_passed += 1 + elif skipped: + ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'") + tests_skipped += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed.") +else: + ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py new file mode 100755 index 000000000000..7bca81f2f606 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_sysfs.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for Indirect Target Selection(ITS) mitigation sysfs status. + +import sys, os, re +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft + +from common import * + +bug = "indirect_target_selection" +mitigation = get_sysfs(bug) + +ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks" +ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB" +ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected" +ITS_MITIGATION_VULNERABLE = "Vulnerable" + +def check_mitigation(): + if mitigation == ITS_MITIGATION_ALIGNED_THUNKS: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF) + return + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS) + return + + if mitigation == ITS_MITIGATION_RETPOLINE_STUFF: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + if sysfs_has('retbleed', 'Stuffing'): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VMEXIT_ONLY: + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VULNERABLE: + if sysfs_has("spectre_v2", "Vulnerable"): + bug_check_pass(bug, ITS_MITIGATION_VULNERABLE) + else: + bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE) + + bug_status_unknown(bug, mitigation) + return + +ksft.print_header() +ksft.set_plan(1) +ksft.print_msg(f'{bug}: {mitigation} ...') + +if not basic_checks_sufficient(bug, mitigation): + check_mitigation() + +ksft.finished() diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 18d736640ece..0873b0e5f48b 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -682,7 +682,7 @@ int do_uring(unsigned long lam) return 1; if (fstat(file_fd, &st) < 0) - return 1; + goto cleanup; off_t file_sz = st.st_size; @@ -690,7 +690,7 @@ int do_uring(unsigned long lam) fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); if (!fi) - return 1; + goto cleanup; fi->file_sz = file_sz; fi->file_fd = file_fd; @@ -698,7 +698,7 @@ int do_uring(unsigned long lam) ring = malloc(sizeof(*ring)); if (!ring) { free(fi); - return 1; + goto cleanup; } memset(ring, 0, sizeof(struct io_ring)); @@ -729,6 +729,8 @@ out: } free(fi); +cleanup: + close(file_fd); return ret; } @@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void) wq = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); + close(fd); if (wq == MAP_FAILED) perror("mmap"); diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c index 23c1d6c964ea..97fe4bd8bc77 100644 --- a/tools/testing/selftests/x86/xstate.c +++ b/tools/testing/selftests/x86/xstate.c @@ -31,7 +31,8 @@ (1 << XFEATURE_OPMASK) | \ (1 << XFEATURE_ZMM_Hi256) | \ (1 << XFEATURE_Hi16_ZMM) | \ - (1 << XFEATURE_XTILEDATA)) + (1 << XFEATURE_XTILEDATA) | \ + (1 << XFEATURE_APX)) static inline uint64_t xgetbv(uint32_t index) { diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h index 42af36ec852f..e91e3092b5d2 100644 --- a/tools/testing/selftests/x86/xstate.h +++ b/tools/testing/selftests/x86/xstate.h @@ -33,6 +33,7 @@ enum xfeature { XFEATURE_RSRVD_COMP_16, XFEATURE_XTILECFG, XFEATURE_XTILEDATA, + XFEATURE_APX, XFEATURE_MAX, }; @@ -59,6 +60,7 @@ static const char *xfeature_names[] = "unknown xstate feature", "AMX Tile config", "AMX Tile data", + "APX registers", "unknown xstate feature", }; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index d0f6d253ac72..613551132a96 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1264,21 +1264,25 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_expectln("RECEIVED"); - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); - if (ret < 0) { - if (errno == EOPNOTSUPP) { - fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); - } else { + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. Repeat SIOCOUTQ until it returns 0. + */ + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + break; + } perror("ioctl"); exit(EXIT_FAILURE); } - } else if (ret == 0 && sock_bytes_unsent != 0) { - fprintf(stderr, - "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", - sock_bytes_unsent); - exit(EXIT_FAILURE); - } - + timeout_check("SIOCOUTQ"); + } while (sock_bytes_unsent != 0); + timeout_end(); close(fd); } |