498 files changed, 47293 insertions, 9207 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 139d5e87dc95..b35d954d50c3 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -245,7 +245,7 @@
 #define MIDR_FUJITSU_ERRATUM_010001_MASK	(~MIDR_CPU_VAR_REV(1, 0))
 #define TCR_CLEAR_FUJITSU_ERRATUM_010001	(TCR_NFD1 | TCR_NFD0)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 #include <asm/sysreg.h>
 
@@ -338,6 +338,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
 	return read_cpuid(CTR_EL0);
 }
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif
diff --git a/tools/arch/arm64/include/asm/esr.h b/tools/arch/arm64/include/asm/esr.h
index bd592ca81571..bbfbd1497a2f 100644
--- a/tools/arch/arm64/include/asm/esr.h
+++ b/tools/arch/arm64/include/asm/esr.h
@@ -385,7 +385,7 @@
 #define ESR_ELx_MOPS_ISS_SRCREG(esr)	(((esr) & (UL(0x1f) << 5)) >> 5)
 #define ESR_ELx_MOPS_ISS_SIZEREG(esr)	(((esr) & (UL(0x1f) << 0)) >> 0)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <asm/types.h>
 
 static inline unsigned long esr_brk_comment(unsigned long esr)
@@ -450,6 +450,6 @@ static inline bool esr_iss_is_eretab(unsigned long esr)
 }
 
 const char *esr_get_class_string(unsigned long esr);
-#endif /* __ASSEMBLY */
+#endif /* __ASSEMBLER__ */
 
 #endif /* __ASM_ESR_H */
diff --git a/tools/arch/arm64/include/asm/gpr-num.h b/tools/arch/arm64/include/asm/gpr-num.h
index 05da4a7c5788..a114e4f8209b 100644
--- a/tools/arch/arm64/include/asm/gpr-num.h
+++ b/tools/arch/arm64/include/asm/gpr-num.h
@@ -2,7 +2,7 @@
 #ifndef __ASM_GPR_NUM_H
 #define __ASM_GPR_NUM_H
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
 	.equ	.L__gpr_num_x\num, \num
@@ -11,7 +11,7 @@
 	.equ	.L__gpr_num_xzr, 31
 	.equ	.L__gpr_num_wzr, 31
 
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
 
 #define __DEFINE_ASM_GPR_NUMS					\
 "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
@@ -21,6 +21,6 @@
 "	.equ	.L__gpr_num_xzr, 31\n"				\
 "	.equ	.L__gpr_num_wzr, 31\n"
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* __ASM_GPR_NUM_H */
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index 65f2759ea27a..178b7322bf04 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -51,7 +51,7 @@
 
 #ifndef CONFIG_BROKEN_GAS_INST
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 // The space separator is omitted so that __emit_inst(x) can be parsed as
 // either an assembler directive or an assembler macro argument.
 #define __emit_inst(x)			.inst(x)
@@ -70,11 +70,11 @@
 					 (((x) >> 24) & 0x000000ff))
 #endif	/* CONFIG_CPU_BIG_ENDIAN */
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 #define __emit_inst(x)			.long __INSTR_BSWAP(x)
-#else  /* __ASSEMBLY__ */
+#else  /* __ASSEMBLER__ */
 #define __emit_inst(x)			".long " __stringify(__INSTR_BSWAP(x)) "\n\t"
-#endif	/* __ASSEMBLY__ */
+#endif	/* __ASSEMBLER__ */
 
 #endif	/* CONFIG_BROKEN_GAS_INST */
 
@@ -1078,9 +1078,7 @@
 #define GCS_CAP(x)	((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
 					       GCS_CAP_VALID_TOKEN)
 
-#define ARM64_FEATURE_FIELD_BITS	4
-
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 
 	.macro	mrs_s, rt, sreg
 	 __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt))
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index ed5f3892674c..a792a599b9d6 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -31,7 +31,7 @@
 #define KVM_SPSR_FIQ	4
 #define KVM_NR_SPSR	5
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <linux/psci.h>
 #include <linux/types.h>
 #include <asm/ptrace.h>
diff --git a/tools/arch/riscv/include/asm/csr.h b/tools/arch/riscv/include/asm/csr.h
index 56d7367ee344..21d8cee04638 100644
--- a/tools/arch/riscv/include/asm/csr.h
+++ b/tools/arch/riscv/include/asm/csr.h
@@ -167,7 +167,8 @@
 #define VSIP_TO_HVIP_SHIFT	(IRQ_VS_SOFT - IRQ_S_SOFT)
 #define VSIP_VALID_MASK		((_AC(1, UL) << IRQ_S_SOFT) | \
 				 (_AC(1, UL) << IRQ_S_TIMER) | \
-				 (_AC(1, UL) << IRQ_S_EXT))
+				 (_AC(1, UL) << IRQ_S_EXT) | \
+				 (_AC(1, UL) << IRQ_PMU_OVF))
 
 /* AIA CSR bits */
 #define TOPI_IID_SHIFT		16
@@ -280,7 +281,7 @@
 #define CSR_HPMCOUNTER30H	0xc9e
 #define CSR_HPMCOUNTER31H	0xc9f
 
-#define CSR_SSCOUNTOVF		0xda0
+#define CSR_SCOUNTOVF		0xda0
 
 #define CSR_SSTATUS		0x100
 #define CSR_SIE			0x104
diff --git a/tools/arch/s390/include/uapi/asm/bitsperlong.h b/tools/arch/s390/include/uapi/asm/bitsperlong.h
index d2bb620119bf..a226a1686a53 100644
--- a/tools/arch/s390/include/uapi/asm/bitsperlong.h
+++ b/tools/arch/s390/include/uapi/asm/bitsperlong.h
@@ -2,11 +2,7 @@
 #ifndef __ASM_S390_BITSPERLONG_H
 #define __ASM_S390_BITSPERLONG_H
 
-#ifndef __s390x__
-#define __BITS_PER_LONG 32
-#else
 #define __BITS_PER_LONG 64
-#endif
 
 #include <asm-generic/bitsperlong.h>
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 06fc0479a23f..ccc01ad6ff7c 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -320,7 +320,7 @@
 #define X86_FEATURE_FSRS		(12*32+11) /* Fast short REP STOSB */
 #define X86_FEATURE_FSRC		(12*32+12) /* Fast short REP {CMPSB,SCASB} */
 #define X86_FEATURE_FRED		(12*32+17) /* "fred" Flexible Return and Event Delivery */
-#define X86_FEATURE_LKGS		(12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_LKGS		(12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */
 #define X86_FEATURE_WRMSRNS		(12*32+19) /* Non-serializing WRMSR */
 #define X86_FEATURE_AMX_FP16		(12*32+21) /* AMX fp16 Support */
 #define X86_FEATURE_AVX_IFMA            (12*32+23) /* Support for VPMADD52[H,L]UQ */
@@ -407,9 +407,12 @@
 #define X86_FEATURE_ENQCMD		(16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
 #define X86_FEATURE_SGX_LC		(16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
 
-/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+/*
+ * Linux-defined word for use with scattered/synthetic bits.
+ */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
 #define X86_FEATURE_SUCCOR		(17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+
 #define X86_FEATURE_SMCA		(17*32+ 3) /* "smca" Scalable MCA */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
@@ -444,6 +447,7 @@
 #define X86_FEATURE_VM_PAGE_FLUSH	(19*32+ 2) /* VM Page Flush MSR is supported */
 #define X86_FEATURE_SEV_ES		(19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
 #define X86_FEATURE_SEV_SNP		(19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SNP_SECURE_TSC	(19*32+ 8) /* SEV-SNP Secure TSC */
 #define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* Virtual TSC_AUX */
 #define X86_FEATURE_SME_COHERENT	(19*32+10) /* hardware-enforced cache coherency */
 #define X86_FEATURE_DEBUG_SWAP		(19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
@@ -495,6 +499,9 @@
 #define X86_FEATURE_TSA_SQ_NO		(21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
 #define X86_FEATURE_TSA_L1_NO		(21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
 #define X86_FEATURE_CLEAR_CPU_BUF_VM	(21*32+13) /* Clear CPU buffers using VERW before VMRUN */
+#define X86_FEATURE_IBPB_EXIT_TO_USER	(21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_ABMC		(21*32+15) /* Assignable Bandwidth Monitoring Counters */
+#define X86_FEATURE_MSR_IMM		(21*32+16) /* MSR immediate form instructions */
 
 /*
  * BUG word(s)
@@ -551,4 +558,5 @@
 #define X86_BUG_ITS			X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
 #define X86_BUG_ITS_NATIVE_ONLY		X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
 #define X86_BUG_TSA			X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
+#define X86_BUG_VMSCAPE			X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index c683d609934b..8f10f2943370 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -312,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn)
 /**
  * for_each_insn_prefix() -- Iterate prefixes in the instruction
  * @insn: Pointer to struct insn.
- * @idx:  Index storage.
  * @prefix: Prefix byte.
  *
  * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
@@ -321,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn)
  * Since prefixes.nbytes can be bigger than 4 if some prefixes
  * are repeated, it cannot be used for looping over the prefixes.
  */
-#define for_each_insn_prefix(insn, idx, prefix)	\
-	for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+#define for_each_insn_prefix(insn, prefix)	\
+	for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
 
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index f627196eb796..9e1720d73244 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -315,9 +315,12 @@
 #define PERF_CAP_PT_IDX			16
 
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
+
+#define PERF_CAP_LBR_FMT		0x3f
 #define PERF_CAP_PEBS_TRAP		BIT_ULL(6)
 #define PERF_CAP_ARCH_REG		BIT_ULL(7)
 #define PERF_CAP_PEBS_FORMAT		0xf00
+#define PERF_CAP_FW_WRITES		BIT_ULL(13)
 #define PERF_CAP_PEBS_BASELINE		BIT_ULL(14)
 #define PERF_CAP_PEBS_TIMING_INFO	BIT_ULL(17)
 #define PERF_CAP_PEBS_MASK		(PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
@@ -633,6 +636,11 @@
 #define MSR_AMD_PPIN			0xc00102f1
 #define MSR_AMD64_CPUID_FN_7		0xc0011002
 #define MSR_AMD64_CPUID_FN_1		0xc0011004
+
+#define MSR_AMD64_CPUID_EXT_FEAT	0xc0011005
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT	54
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT	BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT)
+
 #define MSR_AMD64_LS_CFG		0xc0011020
 #define MSR_AMD64_DC_CFG		0xc0011022
 #define MSR_AMD64_TW_CFG		0xc0011023
@@ -701,8 +709,15 @@
 #define MSR_AMD64_SNP_VMSA_REG_PROT	BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
 #define MSR_AMD64_SNP_SMT_PROT_BIT	17
 #define MSR_AMD64_SNP_SMT_PROT		BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
-#define MSR_AMD64_SNP_RESV_BIT		18
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT	18
+#define MSR_AMD64_SNP_SECURE_AVIC	BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT		19
 #define MSR_AMD64_SNP_RESERVED_MASK	GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL		0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT		0
+#define MSR_AMD64_SAVIC_EN		BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT	1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI	BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
 #define MSR_AMD64_RMP_BASE		0xc0010132
 #define MSR_AMD64_RMP_END		0xc0010133
 #define MSR_AMD64_RMP_CFG		0xc0010136
@@ -735,6 +750,7 @@
 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS	0xc0000300
 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL		0xc0000301
 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR	0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET	0xc0000303
 
 /* AMD Hardware Feedback Support MSRs */
 #define MSR_AMD_WORKLOAD_CLASS_CONFIG		0xc0000500
@@ -1225,6 +1241,8 @@
 /* - AMD: */
 #define MSR_IA32_MBA_BW_BASE		0xc0000200
 #define MSR_IA32_SMBA_BW_BASE		0xc0000280
+#define MSR_IA32_L3_QOS_ABMC_CFG	0xc00003fd
+#define MSR_IA32_L3_QOS_EXT_CFG		0xc00003ff
 #define MSR_IA32_EVT_CFG_BASE		0xc0000400
 
 /* AMD-V MSRs */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 0f15d683817d..d420c9c066d4 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -35,6 +35,11 @@
 #define MC_VECTOR 18
 #define XM_VECTOR 19
 #define VE_VECTOR 20
+#define CP_VECTOR 21
+
+#define HV_VECTOR 28
+#define VC_VECTOR 29
+#define SX_VECTOR 30
 
 /* Select x86 specific features in <linux/kvm.h> */
 #define __KVM_HAVE_PIT
@@ -411,6 +416,35 @@ struct kvm_xcrs {
 	__u64 padding[16];
 };
 
+#define KVM_X86_REG_TYPE_MSR		2
+#define KVM_X86_REG_TYPE_KVM		3
+
+#define KVM_X86_KVM_REG_SIZE(reg)						\
+({										\
+	reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0;			\
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg)					\
+({										\
+	__u64 type_size = (__u64)type << 32;					\
+										\
+	type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 :		\
+		     type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) :	\
+		     0;								\
+	type_size;								\
+})
+
+#define KVM_X86_REG_ID(type, index)				\
+	(KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index)					\
+	KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index)					\
+	KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index)
+
+/* KVM-defined registers starting from 0 */
+#define KVM_REG_GUEST_SSP	0
+
 #define KVM_SYNC_X86_REGS      (1UL << 0)
 #define KVM_SYNC_X86_SREGS     (1UL << 1)
 #define KVM_SYNC_X86_EVENTS    (1UL << 2)
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 9c640a521a67..650e3256ea7d 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -118,6 +118,10 @@
 #define SVM_VMGEXIT_AP_CREATE			1
 #define SVM_VMGEXIT_AP_DESTROY			2
 #define SVM_VMGEXIT_SNP_RUN_VMPL		0x80000018
+#define SVM_VMGEXIT_SAVIC			0x8000001a
+#define SVM_VMGEXIT_SAVIC_REGISTER_GPA		0
+#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA	1
+#define SVM_VMGEXIT_SAVIC_SELF_GPA		~0ULL
 #define SVM_VMGEXIT_HV_FEATURES			0x8000fffd
 #define SVM_VMGEXIT_TERM_REQUEST		0x8000fffe
 #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code)	\
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index f0f4a4cf84a7..1baa86dfe029 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -93,7 +93,10 @@
 #define EXIT_REASON_TPAUSE              68
 #define EXIT_REASON_BUS_LOCK            74
 #define EXIT_REASON_NOTIFY              75
+#define EXIT_REASON_SEAMCALL            76
 #define EXIT_REASON_TDCALL              77
+#define EXIT_REASON_MSR_READ_IMM        84
+#define EXIT_REASON_MSR_WRITE_IMM       85
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -158,7 +161,9 @@
 	{ EXIT_REASON_TPAUSE,                "TPAUSE" }, \
 	{ EXIT_REASON_BUS_LOCK,              "BUS_LOCK" }, \
 	{ EXIT_REASON_NOTIFY,                "NOTIFY" }, \
-	{ EXIT_REASON_TDCALL,                "TDCALL" }
+	{ EXIT_REASON_TDCALL,                "TDCALL" }, \
+	{ EXIT_REASON_MSR_READ_IMM,          "MSR_READ_IMM" }, \
+	{ EXIT_REASON_MSR_WRITE_IMM,         "MSR_WRITE_IMM" }
 
 #define VMX_EXIT_REASON_FLAGS \
 	{ VMX_EXIT_REASONS_FAILED_VMENTRY,	"FAILED_VMENTRY" }
diff --git a/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk b/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk
new file mode 100644
index 000000000000..cc4c7a3e6c2e
--- /dev/null
+++ b/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk
@@ -0,0 +1,34 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# Usage: awk -f gen-cpu-feature-names-x86.awk cpufeatures.h > cpu-feature-names.c
+#
+
+BEGIN {
+	print "/* cpu feature name array generated from cpufeatures.h */"
+	print "/* Do not change this code. */"
+	print
+	print "static const char *cpu_feature_names[(NCAPINTS+NBUGINTS)*32] = {"
+
+	value_expr = "\\([0-9*+ ]+\\)"
+}
+
+/^#define X86_FEATURE_/ {
+	if (match($0, value_expr)) {
+		value = substr($0, RSTART + 1, RLENGTH - 2)
+		print "\t[" value "] = \"" $2 "\","
+	}
+}
+
+/^#define X86_BUG_/ {
+	if (match($0, value_expr)) {
+		value = substr($0, RSTART + 1, RLENGTH - 2)
+		print "\t[NCAPINTS*32+(" value ")] = \"" $2 "\","
+	}
+}
+
+END {
+	print "};"
+}
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 062bbd6cd048..fd2585af1252 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -32,7 +32,7 @@ FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
 FEATURE_DISPLAY = libbfd
 
 check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean resolve_btfids_clean
 ifdef MAKECMDGOALS
 ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
   check_feat := 0
@@ -70,7 +70,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
 
 PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
 
-all: $(PROGS) bpftool runqslower
+all: $(PROGS) bpftool
 
 $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
 $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
@@ -86,7 +86,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
 $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
 $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
 
-clean: bpftool_clean runqslower_clean resolve_btfids_clean
+clean: bpftool_clean resolve_btfids_clean
 	$(call QUIET_CLEAN, bpf-progs)
 	$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
 	       $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
@@ -112,12 +112,6 @@ bpftool_install:
 bpftool_clean:
 	$(call descend,bpftool,clean)
 
-runqslower:
-	$(call descend,runqslower)
-
-runqslower_clean:
-	$(call descend,runqslower,clean)
-
 resolve_btfids:
 	$(call descend,resolve_btfids)
 
@@ -125,5 +119,4 @@ resolve_btfids_clean:
 	$(call descend,resolve_btfids,clean)
 
 .PHONY: all install clean bpftool bpftool_install bpftool_clean \
-	runqslower runqslower_clean \
 	resolve_btfids resolve_btfids_clean
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 252e4c538edb..1af3305ea2b2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -55,7 +55,8 @@ MAP COMMANDS
 |     | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |     | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
 |     | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-|     | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** }
+|     | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena**
+|     | **insn_array** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 009633294b09..35aeeaf5f711 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -182,7 +182,7 @@ bpftool prog tracelog
 
 bpftool prog tracelog { stdout | stderr } *PROG*
     Dump the BPF stream of the program. BPF programs can write to these streams
-    at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write
+    at runtime with the **bpf_stream_vprintk_impl**\ () kfunc. The kernel may write
     error messages to the standard error stream. This facility should be used
     only for debugging purposes.
 
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index ff12628593ae..def297e879f4 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -590,7 +590,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
 	case BTF_KIND_DATASEC:
 		return btf_dumper_datasec(d, type_id, data);
 	default:
-		jsonw_printf(d->jw, "(unsupported-kind");
+		jsonw_printf(d->jw, "(unsupported-kind)");
 		return -EINVAL;
 	}
 }
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index c9de44a45778..7ebf7dbcfba4 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1477,7 +1477,8 @@ static int do_help(int argc, char **argv)
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
 		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
-		"                 task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n"
+		"                 task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena |\n"
+		"                 insn_array }\n"
 		"       " HELP_SPEC_OPTIONS " |\n"
 		"                    {-f|--bpffs} | {-n|--nomount} }\n"
 		"",
diff --git a/tools/bpf/bpftool/sign.c b/tools/bpf/bpftool/sign.c
index b34f74d210e9..f9b742f4bb10 100644
--- a/tools/bpf/bpftool/sign.c
+++ b/tools/bpf/bpftool/sign.c
@@ -28,6 +28,12 @@
 
 #define OPEN_SSL_ERR_BUF_LEN 256
 
+/* Use deprecated in 3.0 ERR_get_error_line_data for openssl < 3 */
+#if !defined(OPENSSL_VERSION_MAJOR) || (OPENSSL_VERSION_MAJOR < 3)
+#define ERR_get_error_all(file, line, func, data, flags) \
+	ERR_get_error_line_data(file, line, data, flags)
+#endif
+
 static void display_openssl_errors(int l)
 {
 	char buf[OPEN_SSL_ERR_BUF_LEN];
diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore
deleted file mode 100644
index ffdb70230c8b..000000000000
--- a/tools/bpf/runqslower/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-/.output
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
deleted file mode 100644
index 78a436c4072e..000000000000
--- a/tools/bpf/runqslower/Makefile
+++ /dev/null
@@ -1,91 +0,0 @@
-# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-include ../../scripts/Makefile.include
-
-OUTPUT ?= $(abspath .output)/
-
-BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
-DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool
-BPFTOOL ?= $(DEFAULT_BPFTOOL)
-BPF_TARGET_ENDIAN ?= --target=bpf
-LIBBPF_SRC := $(abspath ../../lib/bpf)
-BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
-BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
-BPF_DESTDIR := $(BPFOBJ_OUTPUT)
-BPF_INCLUDE := $(BPF_DESTDIR)/include
-INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi)
-CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS)
-CFLAGS += $(EXTRA_CFLAGS)
-LDFLAGS += $(EXTRA_LDFLAGS)
-LDLIBS += -lelf -lz
-
-# Try to detect best kernel BTF source
-KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)		\
-	$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
-	../../../vmlinux /sys/kernel/btf/vmlinux	\
-	/boot/vmlinux-$(KERNEL_REL)
-VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
-					  $(wildcard $(VMLINUX_BTF_PATHS))))
-
-ifneq ($(V),1)
-MAKEFLAGS += --no-print-directory
-submake_extras := feature_display=0
-endif
-
-.DELETE_ON_ERROR:
-
-.PHONY: all clean runqslower libbpf_hdrs
-all: runqslower
-
-runqslower: $(OUTPUT)/runqslower
-
-clean:
-	$(call QUIET_CLEAN, runqslower)
-	$(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT)
-	$(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d
-	$(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
-	$(Q)$(RM) $(OUTPUT)runqslower
-	$(Q)$(RM) -r .output
-
-libbpf_hdrs: $(BPFOBJ)
-
-$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
-
-$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h	      \
-			$(OUTPUT)/runqslower.bpf.o | libbpf_hdrs
-
-$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs
-
-$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
-	$(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
-
-$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
-	$(QUIET_GEN)$(CLANG) -g -O2 $(BPF_TARGET_ENDIAN) $(INCLUDES)	      \
-		 -c $(filter %.c,$^) -o $@ &&				      \
-	$(LLVM_STRIP) -g $@
-
-$(OUTPUT)/%.o: %.c | $(OUTPUT)
-	$(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
-
-$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
-	$(QUIET_MKDIR)mkdir -p $@
-
-$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
-ifeq ($(VMLINUX_H),)
-	$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
-		echo "Couldn't find kernel BTF; set VMLINUX_BTF to"	       \
-			"specify its location." >&2;			       \
-		exit 1;\
-	fi
-	$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
-else
-	$(Q)cp "$(VMLINUX_H)" $@
-endif
-
-$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \
-		    DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers
-
-$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
deleted file mode 100644
index fced54a3adf6..000000000000
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2019 Facebook
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include "runqslower.h"
-
-#define TASK_RUNNING 0
-#define BPF_F_CURRENT_CPU 0xffffffffULL
-
-const volatile __u64 min_us = 0;
-const volatile pid_t targ_pid = 0;
-
-struct {
-	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
-	__uint(map_flags, BPF_F_NO_PREALLOC);
-	__type(key, int);
-	__type(value, u64);
-} start SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
-	__uint(key_size, sizeof(u32));
-	__uint(value_size, sizeof(u32));
-} events SEC(".maps");
-
-/* record enqueue timestamp */
-__always_inline
-static int trace_enqueue(struct task_struct *t)
-{
-	u32 pid = t->pid;
-	u64 *ptr;
-
-	if (!pid || (targ_pid && targ_pid != pid))
-		return 0;
-
-	ptr = bpf_task_storage_get(&start, t, 0,
-				   BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (!ptr)
-		return 0;
-
-	*ptr = bpf_ktime_get_ns();
-	return 0;
-}
-
-SEC("tp_btf/sched_wakeup")
-int handle__sched_wakeup(u64 *ctx)
-{
-	/* TP_PROTO(struct task_struct *p) */
-	struct task_struct *p = (void *)ctx[0];
-
-	return trace_enqueue(p);
-}
-
-SEC("tp_btf/sched_wakeup_new")
-int handle__sched_wakeup_new(u64 *ctx)
-{
-	/* TP_PROTO(struct task_struct *p) */
-	struct task_struct *p = (void *)ctx[0];
-
-	return trace_enqueue(p);
-}
-
-SEC("tp_btf/sched_switch")
-int handle__sched_switch(u64 *ctx)
-{
-	/* TP_PROTO(bool preempt, struct task_struct *prev,
-	 *	    struct task_struct *next)
-	 */
-	struct task_struct *prev = (struct task_struct *)ctx[1];
-	struct task_struct *next = (struct task_struct *)ctx[2];
-	struct runq_event event = {};
-	u64 *tsp, delta_us;
-	u32 pid;
-
-	/* ivcsw: treat like an enqueue event and store timestamp */
-	if (prev->__state == TASK_RUNNING)
-		trace_enqueue(prev);
-
-	pid = next->pid;
-
-	/* For pid mismatch, save a bpf_task_storage_get */
-	if (!pid || (targ_pid && targ_pid != pid))
-		return 0;
-
-	/* fetch timestamp and calculate delta */
-	tsp = bpf_task_storage_get(&start, next, 0, 0);
-	if (!tsp)
-		return 0;   /* missed enqueue */
-
-	delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
-	if (min_us && delta_us <= min_us)
-		return 0;
-
-	event.pid = pid;
-	event.delta_us = delta_us;
-	bpf_get_current_comm(&event.task, sizeof(event.task));
-
-	/* output */
-	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
-			      &event, sizeof(event));
-
-	bpf_task_storage_delete(&start, next);
-	return 0;
-}
-
-char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
deleted file mode 100644
index 83c5993a139a..000000000000
--- a/tools/bpf/runqslower/runqslower.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-// Copyright (c) 2019 Facebook
-#include <argp.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <bpf/libbpf.h>
-#include <bpf/bpf.h>
-#include "runqslower.h"
-#include "runqslower.skel.h"
-
-struct env {
-	pid_t pid;
-	__u64 min_us;
-	bool verbose;
-} env = {
-	.min_us = 10000,
-};
-
-const char *argp_program_version = "runqslower 0.1";
-const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-const char argp_program_doc[] =
-"runqslower    Trace long process scheduling delays.\n"
-"              For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n"
-"\n"
-"This script traces high scheduling delays between tasks being\n"
-"ready to run and them running on CPU after that.\n"
-"\n"
-"USAGE: runqslower [-p PID] [min_us]\n"
-"\n"
-"EXAMPLES:\n"
-"    runqslower         # trace run queue latency higher than 10000 us (default)\n"
-"    runqslower 1000    # trace run queue latency higher than 1000 us\n"
-"    runqslower -p 123  # trace pid 123 only\n";
-
-static const struct argp_option opts[] = {
-	{ "pid", 'p', "PID", 0, "Process PID to trace"},
-	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
-	{},
-};
-
-static error_t parse_arg(int key, char *arg, struct argp_state *state)
-{
-	static int pos_args;
-	int pid;
-	long long min_us;
-
-	switch (key) {
-	case 'v':
-		env.verbose = true;
-		break;
-	case 'p':
-		errno = 0;
-		pid = strtol(arg, NULL, 10);
-		if (errno || pid <= 0) {
-			fprintf(stderr, "Invalid PID: %s\n", arg);
-			argp_usage(state);
-		}
-		env.pid = pid;
-		break;
-	case ARGP_KEY_ARG:
-		if (pos_args++) {
-			fprintf(stderr,
-				"Unrecognized positional argument: %s\n", arg);
-			argp_usage(state);
-		}
-		errno = 0;
-		min_us = strtoll(arg, NULL, 10);
-		if (errno || min_us <= 0) {
-			fprintf(stderr, "Invalid delay (in us): %s\n", arg);
-			argp_usage(state);
-		}
-		env.min_us = min_us;
-		break;
-	default:
-		return ARGP_ERR_UNKNOWN;
-	}
-	return 0;
-}
-
-int libbpf_print_fn(enum libbpf_print_level level,
-		    const char *format, va_list args)
-{
-	if (level == LIBBPF_DEBUG && !env.verbose)
-		return 0;
-	return vfprintf(stderr, format, args);
-}
-
-void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
-{
-	const struct runq_event *e = data;
-	struct tm *tm;
-	char ts[32];
-	time_t t;
-
-	time(&t);
-	tm = localtime(&t);
-	strftime(ts, sizeof(ts), "%H:%M:%S", tm);
-	printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us);
-}
-
-void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
-{
-	printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu);
-}
-
-int main(int argc, char **argv)
-{
-	static const struct argp argp = {
-		.options = opts,
-		.parser = parse_arg,
-		.doc = argp_program_doc,
-	};
-	struct perf_buffer *pb = NULL;
-	struct runqslower_bpf *obj;
-	int err;
-
-	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
-	if (err)
-		return err;
-
-	libbpf_set_print(libbpf_print_fn);
-
-	/* Use libbpf 1.0 API mode */
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
-	obj = runqslower_bpf__open();
-	if (!obj) {
-		fprintf(stderr, "failed to open and/or load BPF object\n");
-		return 1;
-	}
-
-	/* initialize global data (filtering options) */
-	obj->rodata->targ_pid = env.pid;
-	obj->rodata->min_us = env.min_us;
-
-	err = runqslower_bpf__load(obj);
-	if (err) {
-		fprintf(stderr, "failed to load BPF object: %d\n", err);
-		goto cleanup;
-	}
-
-	err = runqslower_bpf__attach(obj);
-	if (err) {
-		fprintf(stderr, "failed to attach BPF programs\n");
-		goto cleanup;
-	}
-
-	printf("Tracing run queue latency higher than %llu us\n", env.min_us);
-	printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)");
-
-	pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64,
-			      handle_event, handle_lost_events, NULL, NULL);
-	err = libbpf_get_error(pb);
-	if (err) {
-		pb = NULL;
-		fprintf(stderr, "failed to open perf buffer: %d\n", err);
-		goto cleanup;
-	}
-
-	while ((err = perf_buffer__poll(pb, 100)) >= 0)
-		;
-	printf("Error polling perf buffer: %d\n", err);
-
-cleanup:
-	perf_buffer__free(pb);
-	runqslower_bpf__destroy(obj);
-
-	return err != 0;
-}
diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h
deleted file mode 100644
index 4f70f07200c2..000000000000
--- a/tools/bpf/runqslower/runqslower.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __RUNQSLOWER_H
-#define __RUNQSLOWER_H
-
-#define TASK_COMM_LEN 16
-
-struct runq_event {
-	char task[TASK_COMM_LEN];
-	__u64 delta_us;
-	pid_t pid;
-};
-
-#endif /* __RUNQSLOWER_H */
diff --git a/tools/build/Build b/tools/build/Build
new file mode 100644
index 000000000000..1c7e598e9f59
--- /dev/null
+++ b/tools/build/Build
@@ -0,0 +1,2 @@
+hostprogs	:= fixdep
+fixdep-y	:= fixdep.o
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 63ef21878761..3a5a3808ab2a 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -37,5 +37,22 @@ ifneq ($(wildcard $(TMP_O)),)
 	$(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
 endif
 
-$(OUTPUT)fixdep: $(srctree)/tools/build/fixdep.c
-	$(QUIET_CC)$(HOSTCC) $(KBUILD_HOSTCFLAGS) $(KBUILD_HOSTLDFLAGS) -o $@ $<
+FIXDEP		:= $(OUTPUT)fixdep
+FIXDEP_IN	:= $(OUTPUT)fixdep-in.o
+
+# To track fixdep's dependencies properly, fixdep needs to run on itself.
+# Build it twice the first time.
+$(FIXDEP_IN): FORCE
+	$(Q)if [ ! -f $(FIXDEP) ]; then						\
+		$(MAKE) $(build)=fixdep HOSTCFLAGS="$(KBUILD_HOSTCFLAGS)";	\
+		rm -f $(FIXDEP).o;						\
+	fi
+	$(Q)$(MAKE) $(build)=fixdep HOSTCFLAGS="$(KBUILD_HOSTCFLAGS)"
+
+
+$(FIXDEP): $(FIXDEP_IN)
+	$(QUIET_LINK)$(HOSTCC) $(FIXDEP_IN) $(KBUILD_HOSTLDFLAGS) -o $@
+
+FORCE:
+
+.PHONY: FORCE
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 32bbe29fe5f6..300a329bc581 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -315,5 +315,7 @@ endef
 
 ifeq ($(FEATURE_DISPLAY_DEFERRED),)
   $(call feature_display_entries)
-  $(info )
+  ifeq ($(feature_display),1)
+    $(info )
+  endif
 endif
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 49b0add392b1..95646290cb89 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -107,7 +107,7 @@ all: $(FILES)
 __BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
   BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
   BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
-  BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
+  BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -ldl -lz -llzma -lzstd
 
 __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
   BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
@@ -115,7 +115,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
 ###############################
 
 $(OUTPUT)test-all.bin:
-	$(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty
+	$(BUILD_ALL)
 
 $(OUTPUT)test-hello.bin:
 	$(BUILD)
diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py
new file mode 100755
index 000000000000..958d5a745724
--- /dev/null
+++ b/tools/docs/check-variable-fonts.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) Akira Yokosawa, 2024
+#
+# Ported to Python by (c) Mauro Carvalho Chehab, 2025
+#
+# pylint: disable=C0103
+
+"""
+Detect problematic Noto CJK variable fonts.
+
+or more details, see .../tools/lib/python/kdoc/latex_fonts.py.
+"""
+
+import argparse
+import sys
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+
+from kdoc.latex_fonts import LatexFontChecker
+
+checker = LatexFontChecker()
+
+parser=argparse.ArgumentParser(description=checker.description(),
+                               formatter_class=argparse.RawTextHelpFormatter)
+parser.add_argument("--deny-vf",
+                    help="XDG_CONFIG_HOME dir containing fontconfig/fonts.conf file")
+
+args=parser.parse_args()
+
+msg = LatexFontChecker(args.deny_vf).check()
+if msg:
+    print(msg)
+
+sys.exit(1)
diff --git a/tools/docs/checktransupdate.py b/tools/docs/checktransupdate.py
new file mode 100755
index 000000000000..e894652369a5
--- /dev/null
+++ b/tools/docs/checktransupdate.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This script helps track the translation status of the documentation
+in different locales, e.g., zh_CN. More specially, it uses `git log`
+commit to find the latest english commit from the translation commit
+(order by author date) and the latest english commits from HEAD. If
+differences occur, report the file and commits that need to be updated.
+
+The usage is as follows:
+- tools/docs/checktransupdate.py -l zh_CN
+This will print all the files that need to be updated or translated in the zh_CN locale.
+- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
+This will only print the status of the specified file.
+
+The output is something like:
+Documentation/dev-tools/kfence.rst
+No translation in the locale of zh_CN
+
+Documentation/translations/zh_CN/dev-tools/testing-overview.rst
+commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
+1 commits needs resolving in total
+"""
+
+import os
+import re
+import time
+import logging
+from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
+from datetime import datetime
+
+
+def get_origin_path(file_path):
+    """Get the origin path from the translation path"""
+    paths = file_path.split("/")
+    tidx = paths.index("translations")
+    opaths = paths[:tidx]
+    opaths += paths[tidx + 2 :]
+    return "/".join(opaths)
+
+
+def get_latest_commit_from(file_path, commit):
+    """Get the latest commit from the specified commit for the specified file"""
+    command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
+    logging.debug(command)
+    pipe = os.popen(command)
+    result = pipe.read()
+    result = result.split("\n")
+    if len(result) <= 1:
+        return None
+
+    logging.debug("Result: %s", result[0])
+
+    return {
+        "hash": result[0],
+        "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
+        "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
+        "message": result[4:],
+    }
+
+
+def get_origin_from_trans(origin_path, t_from_head):
+    """Get the latest origin commit from the translation commit"""
+    o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
+    while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
+        o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
+    if o_from_t is not None:
+        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
+    return o_from_t
+
+
+def get_origin_from_trans_smartly(origin_path, t_from_head):
+    """Get the latest origin commit from the formatted translation commit:
+    (1) update to commit HASH (TITLE)
+    (2) Update the translation through commit HASH (TITLE)
+    """
+    # catch flag for 12-bit commit hash
+    HASH = r'([0-9a-f]{12})'
+    # pattern 1: contains "update to commit HASH"
+    pat_update_to = re.compile(rf'update to commit {HASH}')
+    # pattern 2: contains "Update the translation through commit HASH"
+    pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
+
+    origin_commit_hash = None
+    for line in t_from_head["message"]:
+        # check if the line matches the first pattern
+        match = pat_update_to.search(line)
+        if match:
+            origin_commit_hash = match.group(1)
+            break
+        # check if the line matches the second pattern
+        match = pat_update_translation.search(line)
+        if match:
+            origin_commit_hash = match.group(1)
+            break
+    if origin_commit_hash is None:
+        return None
+    o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
+    if o_from_t is not None:
+        logging.debug("tracked origin commit id: %s", o_from_t["hash"])
+    return o_from_t
+
+
+def get_commits_count_between(opath, commit1, commit2):
+    """Get the commits count between two commits for the specified file"""
+    command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
+    logging.debug(command)
+    pipe = os.popen(command)
+    result = pipe.read().split("\n")
+    # filter out empty lines
+    result = list(filter(lambda x: x != "", result))
+    return result
+
+
+def pretty_output(commit):
+    """Pretty print the commit message"""
+    command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
+    logging.debug(command)
+    pipe = os.popen(command)
+    return pipe.read()
+
+
+def valid_commit(commit):
+    """Check if the commit is valid or not"""
+    msg = pretty_output(commit)
+    return "Merge tag" not in msg
+
+def check_per_file(file_path):
+    """Check the translation status for the specified file"""
+    opath = get_origin_path(file_path)
+
+    if not os.path.isfile(opath):
+        logging.error("Cannot find the origin path for {file_path}")
+        return
+
+    o_from_head = get_latest_commit_from(opath, "HEAD")
+    t_from_head = get_latest_commit_from(file_path, "HEAD")
+
+    if o_from_head is None or t_from_head is None:
+        logging.error("Cannot find the latest commit for %s", file_path)
+        return
+
+    o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
+    # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
+    if o_from_t is None:
+        o_from_t = get_origin_from_trans(opath, t_from_head)
+
+    if o_from_t is None:
+        logging.error("Error: Cannot find the latest origin commit for %s", file_path)
+        return
+
+    if o_from_head["hash"] == o_from_t["hash"]:
+        logging.debug("No update needed for %s", file_path)
+    else:
+        logging.info(file_path)
+        commits = get_commits_count_between(
+            opath, o_from_t["hash"], o_from_head["hash"]
+        )
+        count = 0
+        for commit in commits:
+            if valid_commit(commit):
+                logging.info("commit %s", pretty_output(commit))
+                count += 1
+        logging.info("%d commits needs resolving in total\n", count)
+
+
+def valid_locales(locale):
+    """Check if the locale is valid or not"""
+    script_path = os.path.dirname(os.path.abspath(__file__))
+    linux_path = os.path.join(script_path, "../..")
+    if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
+        raise ArgumentTypeError("Invalid locale: {locale}")
+    return locale
+
+
+def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
+    """List all files with the specified suffix in the folder and its subfolders"""
+    files = []
+    stack = [folder]
+
+    while stack:
+        pwd = stack.pop()
+        # filter out the exclude folders
+        if os.path.basename(pwd) in exclude_folders:
+            continue
+        # list all files and folders
+        for item in os.listdir(pwd):
+            ab_item = os.path.join(pwd, item)
+            if os.path.isdir(ab_item):
+                stack.append(ab_item)
+            else:
+                if ab_item.endswith(include_suffix):
+                    files.append(ab_item)
+
+    return files
+
+
+class DmesgFormatter(logging.Formatter):
+    """Custom dmesg logging formatter"""
+    def format(self, record):
+        timestamp = time.time()
+        formatted_time = f"[{timestamp:>10.6f}]"
+        log_message = f"{formatted_time} {record.getMessage()}"
+        return log_message
+
+
+def config_logging(log_level, log_file="checktransupdate.log"):
+    """configure logging based on the log level"""
+    # set up the root logger
+    logger = logging.getLogger()
+    logger.setLevel(log_level)
+
+    # Create console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(log_level)
+
+    # Create file handler
+    file_handler = logging.FileHandler(log_file)
+    file_handler.setLevel(log_level)
+
+    # Create formatter and add it to the handlers
+    formatter = DmesgFormatter()
+    console_handler.setFormatter(formatter)
+    file_handler.setFormatter(formatter)
+
+    # Add the handler to the logger
+    logger.addHandler(console_handler)
+    logger.addHandler(file_handler)
+
+
+def main():
+    """Main function of the script"""
+    script_path = os.path.dirname(os.path.abspath(__file__))
+    linux_path = os.path.join(script_path, "../..")
+
+    parser = ArgumentParser(description="Check the translation update")
+    parser.add_argument(
+        "-l",
+        "--locale",
+        default="zh_CN",
+        type=valid_locales,
+        help="Locale to check when files are not specified",
+    )
+
+    parser.add_argument(
+        "--print-missing-translations",
+        action=BooleanOptionalAction,
+        default=True,
+        help="Print files that do not have translations",
+    )
+
+    parser.add_argument(
+        '--log',
+        default='INFO',
+        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
+        help='Set the logging level')
+
+    parser.add_argument(
+        '--logfile',
+        default='checktransupdate.log',
+        help='Set the logging file (default: checktransupdate.log)')
+
+    parser.add_argument(
+        "files", nargs="*", help="Files to check, if not specified, check all files"
+    )
+    args = parser.parse_args()
+
+    # Configure logging based on the --log argument
+    log_level = getattr(logging, args.log.upper(), logging.INFO)
+    config_logging(log_level)
+
+    # Get files related to linux path
+    files = args.files
+    if len(files) == 0:
+        offical_files = list_files_with_excluding_folders(
+            os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
+        )
+
+        for file in offical_files:
+            # split the path into parts
+            path_parts = file.split(os.sep)
+            # find the index of the "Documentation" directory
+            kindex = path_parts.index("Documentation")
+            # insert the translations and locale after the Documentation directory
+            new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
+                           + path_parts[kindex + 1 :]
+            # join the path parts back together
+            new_file = os.sep.join(new_path_parts)
+            if os.path.isfile(new_file):
+                files.append(new_file)
+            else:
+                if args.print_missing_translations:
+                    logging.info(os.path.relpath(os.path.abspath(file), linux_path))
+                    logging.info("No translation in the locale of %s\n", args.locale)
+
+    files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
+
+    # cd to linux root directory
+    os.chdir(linux_path)
+
+    for file in files:
+        check_per_file(file)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/docs/documentation-file-ref-check b/tools/docs/documentation-file-ref-check
new file mode 100755
index 000000000000..0cad42f6943b
--- /dev/null
+++ b/tools/docs/documentation-file-ref-check
@@ -0,0 +1,245 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+#
+# Treewide grep for references to files under Documentation, and report
+# non-existing files in stderr.
+
+use warnings;
+use strict;
+use Getopt::Long qw(:config no_auto_abbrev);
+
+# NOTE: only add things here when the file was gone, but the text wants
+# to mention a past documentation file, for example, to give credits for
+# the original work.
+my %false_positives = (
+	"Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help",
+	"drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c",
+);
+
+my $scriptname = $0;
+$scriptname =~ s,tools/docs/([^/]+/),$1,;
+
+# Parse arguments
+my $help = 0;
+my $fix = 0;
+my $warn = 0;
+
+if (! -e ".git") {
+	printf "Warning: can't check if file exists, as this is not a git tree\n";
+	exit 0;
+}
+
+GetOptions(
+	'fix' => \$fix,
+	'warn' => \$warn,
+	'h|help|usage' => \$help,
+);
+
+if ($help != 0) {
+    print "$scriptname [--help] [--fix]\n";
+    exit -1;
+}
+
+# Step 1: find broken references
+print "Finding broken references. This may take a while...  " if ($fix);
+
+my %broken_ref;
+
+my $doc_fix = 0;
+
+open IN, "git grep ':doc:\`' Documentation/|"
+     or die "Failed to run git grep";
+while (<IN>) {
+	next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
+	next if (m,sphinx/,);
+
+	my $file = $1;
+	my $d = $1;
+	my $doc_ref = $2;
+
+	my $f = $doc_ref;
+
+	$d =~ s,(.*/).*,$1,;
+	$f =~ s,.*\<([^\>]+)\>,$1,;
+
+	if ($f =~ m,^/,) {
+		$f = "$f.rst";
+		$f =~ s,^/,Documentation/,;
+	} else {
+		$f = "$d$f.rst";
+	}
+
+	next if (grep -e, glob("$f"));
+
+	if ($fix && !$doc_fix) {
+		print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n";
+	}
+	$doc_fix++;
+
+	print STDERR "$file: :doc:`$doc_ref`\n";
+}
+close IN;
+
+open IN, "git grep 'Documentation/'|"
+     or die "Failed to run git grep";
+while (<IN>) {
+	next if (!m/^([^:]+):(.*)/);
+
+	my $f = $1;
+	my $ln = $2;
+
+	# On linux-next, discard the Next/ directory
+	next if ($f =~ m,^Next/,);
+
+	# Makefiles and scripts contain nasty expressions to parse docs
+	next if ($f =~ m/Makefile/ || $f =~ m/\.(sh|py|pl|~|rej|org|orig)$/);
+
+	# It doesn't make sense to parse hidden files
+	next if ($f =~ m#/\.#);
+
+	# Skip this script
+	next if ($f eq $scriptname);
+
+	# Ignore the dir where documentation will be built
+	next if ($ln =~ m,\b(\S*)Documentation/output,);
+
+	if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) {
+		my $prefix = $1;
+		my $ref = $2;
+		my $base = $2;
+		my $extra = $3;
+
+		# some file references are like:
+		# /usr/src/linux/Documentation/DMA-{API,mapping}.txt
+		# For now, ignore them
+		next if ($extra =~ m/^{/);
+
+		# Remove footnotes at the end like:
+		# Documentation/devicetree/dt-object-internal.txt[1]
+		$ref =~ s/(txt|rst)\[\d+]$/$1/;
+
+		# Remove ending ']' without any '['
+		$ref =~ s/\].*// if (!($ref =~ m/\[/));
+
+		# Remove puntuation marks at the end
+		$ref =~ s/[\,\.]+$//;
+
+		my $fulref = "$prefix$ref";
+
+		$fulref =~ s/^(\<file|ref)://;
+		$fulref =~ s/^[\'\`]+//;
+		$fulref =~ s,^\$\(.*\)/,,;
+		$base =~ s,.*/,,;
+
+		# Remove URL false-positives
+		next if ($fulref =~ m/^http/);
+
+		# Remove sched-pelt false-positive
+		next if ($fulref =~ m,^Documentation/scheduler/sched-pelt$,);
+
+		# Discard some build examples from Documentation/target/tcm_mod_builder.rst
+		next if ($fulref =~ m,mnt/sdb/lio-core-2.6.git/Documentation/target,);
+
+		# Check if exists, evaluating wildcards
+		next if (grep -e, glob("$ref $fulref"));
+
+		# Accept relative Documentation patches for tools/
+		if ($f =~ m/tools/) {
+			my $path = $f;
+			$path =~ s,(.*)/.*,$1,;
+			$path =~ s,testing/selftests/bpf,bpf/bpftool,;
+			next if (grep -e, glob("$path/$ref $path/../$ref $path/$fulref"));
+		}
+
+		# Discard known false-positives
+		if (defined($false_positives{$f})) {
+			next if ($false_positives{$f} eq $fulref);
+		}
+
+		if ($fix) {
+			if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) {
+				$broken_ref{$ref}++;
+			}
+		} elsif ($warn) {
+			print STDERR "Warning: $f references a file that doesn't exist: $fulref\n";
+		} else {
+			print STDERR "$f: $fulref\n";
+		}
+	}
+}
+close IN;
+
+exit 0 if (!$fix);
+
+# Step 2: Seek for file name alternatives
+print "Auto-fixing broken references. Please double-check the results\n";
+
+foreach my $ref (keys %broken_ref) {
+	my $new =$ref;
+
+	my $basedir = ".";
+	# On translations, only seek inside the translations directory
+	$basedir  = $1 if ($ref =~ m,(Documentation/translations/[^/]+),);
+
+	# get just the basename
+	$new =~ s,.*/,,;
+
+	my $f="";
+
+	# usual reason for breakage: DT file moved around
+	if ($ref =~ /devicetree/) {
+		# usual reason for breakage: DT file renamed to .yaml
+		if (!$f) {
+			my $new_ref = $ref;
+			$new_ref =~ s/\.txt$/.yaml/;
+			$f=$new_ref if (-f $new_ref);
+		}
+
+		if (!$f) {
+			my $search = $new;
+			$search =~ s,^.*/,,;
+			$f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+			if (!$f) {
+				# Manufacturer name may have changed
+				$search =~ s/^.*,//;
+				$f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+			}
+		}
+	}
+
+	# usual reason for breakage: file renamed to .rst
+	if (!$f) {
+		$new =~ s/\.txt$/.rst/;
+		$f=qx(find $basedir -iname $new) if ($new);
+	}
+
+	# usual reason for breakage: use dash or underline
+	if (!$f) {
+		$new =~ s/[-_]/[-_]/g;
+		$f=qx(find $basedir -iname $new) if ($new);
+	}
+
+	# Wild guess: seek for the same name on another place
+	if (!$f) {
+		$f = qx(find $basedir -iname $new) if ($new);
+	}
+
+	my @find = split /\s+/, $f;
+
+	if (!$f) {
+		print STDERR "ERROR: Didn't find a replacement for $ref\n";
+	} elsif (scalar(@find) > 1) {
+		print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
+		foreach my $j (@find) {
+			$j =~ s,^./,,;
+			print STDERR "    $j\n";
+		}
+	} else {
+		$f = $find[0];
+		$f =~ s,^./,,;
+		print "INFO: Replacing $ref to $f\n";
+		foreach my $j (qx(git grep -l $ref)) {
+			qx(sed "s\@$ref\@$f\@g" -i $j);
+		}
+	}
+}
diff --git a/tools/docs/features-refresh.sh b/tools/docs/features-refresh.sh
new file mode 100755
index 000000000000..c2288124e94a
--- /dev/null
+++ b/tools/docs/features-refresh.sh
@@ -0,0 +1,98 @@
+#
+# Small script that refreshes the kernel feature support status in place.
+#
+
+for F_FILE in Documentation/features/*/*/arch-support.txt; do
+	F=$(grep "^#         Kconfig:" "$F_FILE" | cut -c26-)
+
+	#
+	# Each feature F is identified by a pair (O, K), where 'O' can
+	# be either the empty string (for 'nop') or "not" (the logical
+	# negation operator '!'); other operators are not supported.
+	#
+	O=""
+	K=$F
+	if [[ "$F" == !* ]]; then
+		O="not"
+		K=$(echo $F | sed -e 's/^!//g')
+	fi
+
+	#
+	# F := (O, K) is 'valid' iff there is a Kconfig file (for some
+	# arch) which contains K.
+	#
+	# Notice that this definition entails an 'asymmetry' between
+	# the case 'O = ""' and the case 'O = "not"'. E.g., F may be
+	# _invalid_ if:
+	#
+	# [case 'O = ""']
+	#   1) no arch provides support for F,
+	#   2) K does not exist (e.g., it was renamed/mis-typed);
+	#
+	# [case 'O = "not"']
+	#   3) all archs provide support for F,
+	#   4) as in (2).
+	#
+	# The rationale for adopting this definition (and, thus, for
+	# keeping the asymmetry) is:
+	#
+	#       We want to be able to 'detect' (2) (or (4)).
+	#
+	# (1) and (3) may further warn the developers about the fact
+	# that K can be removed.
+	#
+	F_VALID="false"
+	for ARCH_DIR in arch/*/; do
+		K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+		K_GREP=$(grep "$K" $K_FILES)
+		if [ ! -z "$K_GREP" ]; then
+			F_VALID="true"
+			break
+		fi
+	done
+	if [ "$F_VALID" = "false" ]; then
+		printf "WARNING: '%s' is not a valid Kconfig\n" "$F"
+	fi
+
+	T_FILE="$F_FILE.tmp"
+	grep "^#" $F_FILE > $T_FILE
+	echo "    -----------------------" >> $T_FILE
+	echo "    |         arch |status|" >> $T_FILE
+	echo "    -----------------------" >> $T_FILE
+	for ARCH_DIR in arch/*/; do
+		ARCH=$(echo $ARCH_DIR | sed -e 's/^arch//g' | sed -e 's/\///g')
+		K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+		K_GREP=$(grep "$K" $K_FILES)
+		#
+		# Arch support status values for (O, K) are updated according
+		# to the following rules.
+		#
+		#   - ("", K) is 'supported by a given arch', if there is a
+		#     Kconfig file for that arch which contains K;
+		#
+		#   - ("not", K) is 'supported by a given arch', if there is
+		#     no Kconfig file for that arch which contains K;
+		#
+		#   - otherwise: preserve the previous status value (if any),
+		#                default to 'not yet supported'.
+		#
+		# Notice that, according these rules, invalid features may be
+		# updated/modified.
+		#
+		if [ "$O" = "" ] && [ ! -z "$K_GREP" ]; then
+			printf "    |%12s: |  ok  |\n" "$ARCH" >> $T_FILE
+		elif [ "$O" = "not" ] && [ -z "$K_GREP" ]; then
+			printf "    |%12s: |  ok  |\n" "$ARCH" >> $T_FILE
+		else
+			S=$(grep -v "^#" "$F_FILE" | grep " $ARCH:")
+			if [ ! -z "$S" ]; then
+				echo "$S" >> $T_FILE
+			else
+				printf "    |%12s: | TODO |\n" "$ARCH" \
+					>> $T_FILE
+			fi
+		fi
+	done
+	echo "    -----------------------" >> $T_FILE
+	mv $T_FILE $F_FILE
+done
diff --git a/tools/docs/find-unused-docs.sh b/tools/docs/find-unused-docs.sh
new file mode 100755
index 000000000000..05552dbda5bc
--- /dev/null
+++ b/tools/docs/find-unused-docs.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# (c) 2017, Jonathan Corbet <corbet@lwn.net>
+#           sayli karnik <karniksayli1995@gmail.com>
+#
+# This script detects files with kernel-doc comments for exported functions
+# that are not included in documentation.
+#
+# usage: Run 'tools/docs/find-unused-docs.sh directory' from top level of kernel
+# 	 tree.
+#
+# example: $tools/docs/find-unused-docs.sh drivers/scsi
+#
+# Licensed under the terms of the GNU GPL License
+
+if ! [ -d "Documentation" ]; then
+	echo "Run from top level of kernel tree"
+	exit 1
+fi
+
+if [ "$#" -ne 1 ]; then
+	echo "Usage: tools/docs/find-unused-docs.sh directory"
+	exit 1
+fi
+
+if ! [ -d "$1" ]; then
+	echo "Directory $1 doesn't exist"
+	exit 1
+fi
+
+cd "$( dirname "${BASH_SOURCE[0]}" )"
+cd ..
+
+cd Documentation/
+
+echo "The following files contain kerneldoc comments for exported functions \
+that are not used in the formatted documentation"
+
+# FILES INCLUDED
+
+files_included=($(grep -rHR ".. kernel-doc" --include \*.rst | cut -d " " -f 3))
+
+declare -A FILES_INCLUDED
+
+for each in "${files_included[@]}"; do
+	FILES_INCLUDED[$each]="$each"
+	done
+
+cd ..
+
+# FILES NOT INCLUDED
+
+for file in `find $1 -name '*.c'`; do
+
+	if [[ ${FILES_INCLUDED[$file]+_} ]]; then
+	continue;
+	fi
+	str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null)
+	if [[ -n "$str" ]]; then
+	echo "$file"
+	fi
+	done
+
diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py
new file mode 100755
index 000000000000..2f0b99401f26
--- /dev/null
+++ b/tools/docs/get_abi.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+# pylint: disable=R0903
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+import argparse
+import logging
+import os
+import sys
+
+# Import Python modules
+
+LIB_DIR = "../lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from abi.abi_parser import AbiParser                # pylint: disable=C0413
+from abi.abi_regex import AbiRegex                  # pylint: disable=C0413
+from abi.helpers import ABI_DIR, DEBUG_HELP         # pylint: disable=C0413
+from abi.system_symbols import SystemSymbols        # pylint: disable=C0413
+
+# Command line classes
+
+
+REST_DESC = """
+Produce output in ReST format.
+
+The output is done on two sections:
+
+- Symbols: show all parsed symbols in alphabetic order;
+- Files: cross reference the content of each file with the symbols on it.
+"""
+
+class AbiRest:
+    """Initialize an argparse subparser for rest output"""
+
+    def __init__(self, subparsers):
+        """Initialize argparse subparsers"""
+
+        parser = subparsers.add_parser("rest",
+                                       formatter_class=argparse.RawTextHelpFormatter,
+                                       description=REST_DESC)
+
+        parser.add_argument("--enable-lineno",  action="store_true",
+                            help="enable lineno")
+        parser.add_argument("--raw", action="store_true",
+                            help="output text as contained in the ABI files. "
+                                 "It not used, output will contain dynamically"
+                                 " generated cross references when possible.")
+        parser.add_argument("--no-file", action="store_true",
+                            help="Don't the files section")
+        parser.add_argument("--show-hints", help="Show-hints")
+
+        parser.set_defaults(func=self.run)
+
+    def run(self, args):
+        """Run subparser"""
+
+        parser = AbiParser(args.dir, debug=args.debug)
+        parser.parse_abi()
+        parser.check_issues()
+
+        for t in parser.doc(args.raw, not args.no_file):
+            if args.enable_lineno:
+                print (f".. LINENO {t[1]}#{t[2]}\n\n")
+
+            print(t[0])
+
+class AbiValidate:
+    """Initialize an argparse subparser for ABI validation"""
+
+    def __init__(self, subparsers):
+        """Initialize argparse subparsers"""
+
+        parser = subparsers.add_parser("validate",
+                                       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+                                       description="list events")
+
+        parser.set_defaults(func=self.run)
+
+    def run(self, args):
+        """Run subparser"""
+
+        parser = AbiParser(args.dir, debug=args.debug)
+        parser.parse_abi()
+        parser.check_issues()
+
+
+class AbiSearch:
+    """Initialize an argparse subparser for ABI search"""
+
+    def __init__(self, subparsers):
+        """Initialize argparse subparsers"""
+
+        parser = subparsers.add_parser("search",
+                                       formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+                                       description="Search ABI using a regular expression")
+
+        parser.add_argument("expression",
+                            help="Case-insensitive search pattern for the ABI symbol")
+
+        parser.set_defaults(func=self.run)
+
+    def run(self, args):
+        """Run subparser"""
+
+        parser = AbiParser(args.dir, debug=args.debug)
+        parser.parse_abi()
+        parser.search_symbols(args.expression)
+
+UNDEFINED_DESC="""
+Check undefined ABIs on local machine.
+
+Read sysfs devnodes and check if the devnodes there are defined inside
+ABI documentation.
+
+The search logic tries to minimize the number of regular expressions to
+search per each symbol.
+
+By default, it runs on a single CPU, as Python support for CPU threads
+is still experimental, and multi-process runs on Python is very slow.
+
+On experimental tests, if the number of ABI symbols to search per devnode
+is contained on a limit of ~150 regular expressions, using a single CPU
+is a lot faster than using multiple processes. However, if the number of
+regular expressions to check is at the order of ~30000, using multiple
+CPUs speeds up the check.
+"""
+
+class AbiUndefined:
+    """
+    Initialize an argparse subparser for logic to check undefined ABI at
+    the current machine's sysfs
+    """
+
+    def __init__(self, subparsers):
+        """Initialize argparse subparsers"""
+
+        parser = subparsers.add_parser("undefined",
+                                       formatter_class=argparse.RawTextHelpFormatter,
+                                       description=UNDEFINED_DESC)
+
+        parser.add_argument("-S", "--sysfs-dir", default="/sys",
+                            help="directory where sysfs is mounted")
+        parser.add_argument("-s", "--search-string",
+                            help="search string regular expression to limit symbol search")
+        parser.add_argument("-H", "--show-hints", action="store_true",
+                            help="Hints about definitions for missing ABI symbols.")
+        parser.add_argument("-j", "--jobs", "--max-workers", type=int, default=1,
+                            help="If bigger than one, enables multiprocessing.")
+        parser.add_argument("-c", "--max-chunk-size", type=int, default=50,
+                            help="Maximum number of chunk size")
+        parser.add_argument("-f", "--found", action="store_true",
+                            help="Also show found items. "
+                                 "Helpful to debug the parser."),
+        parser.add_argument("-d", "--dry-run", action="store_true",
+                            help="Don't actually search for undefined. "
+                                 "Helpful to debug the parser."),
+
+        parser.set_defaults(func=self.run)
+
+    def run(self, args):
+        """Run subparser"""
+
+        abi = AbiRegex(args.dir, debug=args.debug,
+                       search_string=args.search_string)
+
+        abi_symbols = SystemSymbols(abi=abi, hints=args.show_hints,
+                                    sysfs=args.sysfs_dir)
+
+        abi_symbols.check_undefined_symbols(dry_run=args.dry_run,
+                                            found=args.found,
+                                            max_workers=args.jobs,
+                                            chunk_size=args.max_chunk_size)
+
+
+def main():
+    """Main program"""
+
+    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+
+    parser.add_argument("-d", "--debug", type=int, default=0, help="debug level")
+    parser.add_argument("-D", "--dir", default=ABI_DIR, help=DEBUG_HELP)
+
+    subparsers = parser.add_subparsers()
+
+    AbiRest(subparsers)
+    AbiValidate(subparsers)
+    AbiSearch(subparsers)
+    AbiUndefined(subparsers)
+
+    args = parser.parse_args()
+
+    if args.debug:
+        level = logging.DEBUG
+    else:
+        level = logging.INFO
+
+    logging.basicConfig(level=level, format="[%(levelname)s] %(message)s")
+
+    if "func" in args:
+        args.func(args)
+    else:
+        sys.exit(f"Please specify a valid command for {sys.argv[0]}")
+
+
+# Call main method
+if __name__ == "__main__":
+    main()
diff --git a/tools/docs/get_feat.py b/tools/docs/get_feat.py
new file mode 100755
index 000000000000..2b5155a1f134
--- /dev/null
+++ b/tools/docs/get_feat.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0911,R0912,R0914,R0915
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+
+"""
+Parse the Linux Feature files and produce a ReST book.
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+from pprint import pprint
+
+LIB_DIR = "../../tools/lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from feat.parse_features import ParseFeature                # pylint: disable=C0413
+
+SRCTREE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../..")
+DEFAULT_DIR = "Documentation/features"
+
+
+class GetFeature:
+    """Helper class to parse feature parsing parameters"""
+
+    @staticmethod
+    def get_current_arch():
+        """Detects the current architecture"""
+
+        proc = subprocess.run(["uname", "-m"], check=True,
+                              capture_output=True, text=True)
+
+        arch = proc.stdout.strip()
+        if arch in ["x86_64", "i386"]:
+            arch = "x86"
+        elif arch == "s390x":
+            arch = "s390"
+
+        return arch
+
+    def run_parser(self, args):
+        """Execute the feature parser"""
+
+        feat = ParseFeature(args.directory, args.debug, args.enable_fname)
+        data = feat.parse()
+
+        if args.debug > 2:
+            pprint(data)
+
+        return feat
+
+    def run_rest(self, args):
+        """
+        Generate tables in ReST format. Three types of tables are
+        supported, depending on the calling arguments:
+
+        - neither feature nor arch is passed: generates a full matrix;
+        - arch provided: generates a table of supported tables for the
+          guiven architecture, eventually filtered by feature;
+        - only feature provided: generates a table with feature details,
+          showing what architectures it is implemented.
+        """
+
+        feat = self.run_parser(args)
+
+        if args.arch:
+            rst = feat.output_arch_table(args.arch, args.feat)
+        elif args.feat:
+            rst = feat.output_feature(args.feat)
+        else:
+            rst = feat.output_matrix()
+
+        print(rst)
+
+    def run_current(self, args):
+        """
+        Instead of using a --arch parameter, get feature for the current
+        architecture.
+        """
+
+        args.arch = self.get_current_arch()
+
+        self.run_rest(args)
+
+    def run_list(self, args):
+        """
+        Generate a list of features for a given architecture, in a format
+        parseable by other scripts. The output format is not ReST.
+        """
+
+        if not args.arch:
+            args.arch = self.get_current_arch()
+
+        feat = self.run_parser(args)
+        msg = feat.list_arch_features(args.arch, args.feat)
+
+        print(msg)
+
+    def parse_arch(self, parser):
+        """Add a --arch parsing argument"""
+
+        parser.add_argument("--arch",
+                            help="Output features for an specific"
+                                 " architecture, optionally filtering for a "
+                                 "single specific feature.")
+
+    def parse_feat(self, parser):
+        """Add a --feat parsing argument"""
+
+        parser.add_argument("--feat", "--feature",
+                            help="Output features for a single specific "
+                                  "feature.")
+
+
+    def current_args(self, subparsers):
+        """Implementscurrent argparse subparser"""
+
+        parser = subparsers.add_parser("current",
+                                       formatter_class=argparse.RawTextHelpFormatter,
+                                       description="Output table in ReST "
+                                                   "compatible ASCII format "
+                                                   "with features for this "
+                                                   "machine's architecture")
+
+        self.parse_feat(parser)
+        parser.set_defaults(func=self.run_current)
+
+    def rest_args(self, subparsers):
+        """Implement rest argparse subparser"""
+
+        parser = subparsers.add_parser("rest",
+                                       formatter_class=argparse.RawTextHelpFormatter,
+                                       description="Output table(s) in ReST "
+                                                   "compatible ASCII format "
+                                                   "with features in ReST "
+                                                   "markup language. The "
+                                                   "output is affected by "
+                                                   "--arch or --feat/--feature"
+                                                   " flags.")
+
+        self.parse_arch(parser)
+        self.parse_feat(parser)
+        parser.set_defaults(func=self.run_rest)
+
+    def list_args(self, subparsers):
+        """Implement list argparse subparser"""
+
+        parser = subparsers.add_parser("list",
+                                       formatter_class=argparse.RawTextHelpFormatter,
+                                       description="List features for this "
+                                                   "machine's architecture, "
+                                                   "using an easier to parse "
+                                                   "format. The output is "
+                                                   "affected by --arch flag.")
+
+        self.parse_arch(parser)
+        self.parse_feat(parser)
+        parser.set_defaults(func=self.run_list)
+
+    def validate_args(self, subparsers):
+        """Implement validate argparse subparser"""
+
+        parser = subparsers.add_parser("validate",
+                                       formatter_class=argparse.RawTextHelpFormatter,
+                                       description="Validate the contents of "
+                                                   "the files under "
+                                                   f"{DEFAULT_DIR}.")
+
+        parser.set_defaults(func=self.run_parser)
+
+    def parser(self):
+        """
+        Create an arparse with common options and several subparsers
+        """
+        parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+
+        parser.add_argument("-d", "--debug", action="count", default=0,
+                            help="Put the script in verbose mode, useful for "
+                                 "debugging. Can be called multiple times, to "
+                                 "increase verbosity.")
+
+        parser.add_argument("--directory", "--dir", default=DEFAULT_DIR,
+                            help="Changes the location of the Feature files. "
+                                 f"By default, it uses the {DEFAULT_DIR} "
+                                 "directory.")
+
+        parser.add_argument("--enable-fname", action="store_true",
+                            help="Prints the file name of the feature files. "
+                                 "This can be used in order to track "
+                                 "dependencies during documentation build.")
+
+        subparsers = parser.add_subparsers()
+
+        self.current_args(subparsers)
+        self.rest_args(subparsers)
+        self.list_args(subparsers)
+        self.validate_args(subparsers)
+
+        args = parser.parse_args()
+
+        return args
+
+
+def main():
+    """Main program"""
+
+    feat = GetFeature()
+
+    args = feat.parser()
+
+    if "func" in args:
+        args.func(args)
+    else:
+        sys.exit(f"Please specify a valid command for {sys.argv[0]}")
+
+
+# Call main method
+if __name__ == "__main__":
+    main()
diff --git a/tools/docs/list-arch.sh b/tools/docs/list-arch.sh
new file mode 100755
index 000000000000..96fe83b7058b
--- /dev/null
+++ b/tools/docs/list-arch.sh
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Small script that visualizes the kernel feature support status
+# of an architecture.
+#
+# (If no arguments are given then it will print the host architecture's status.)
+#
+
+ARCH=${1:-$(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/')}
+
+$(dirname $0)/get_feat.pl list --arch $ARCH
diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py
index bfa4e46a53e3..436acea4c6ca 100755
--- a/tools/docs/parse-headers.py
+++ b/tools/docs/parse-headers.py
@@ -24,10 +24,13 @@ The optional ``FILE_RULES`` contains a set of rules like:
     replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
 """
 
-import argparse
+import argparse, sys
+import os.path
 
-from lib.parse_data_structs import ParseDataStructs
-from lib.enrich_formatter import EnrichFormatter
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+from kdoc.parse_data_structs import ParseDataStructs
+from kdoc.enrich_formatter import EnrichFormatter
 
 def main():
     """Main function"""
@@ -47,10 +50,7 @@ def main():
     args = parser.parse_args()
 
     parser = ParseDataStructs(debug=args.debug)
-    parser.parse_file(args.file_in)
-
-    if args.file_rules:
-        parser.process_exceptions(args.file_rules)
+    parser.parse_file(args.file_in, args.file_rules)
 
     parser.debug_print()
     parser.write_output(args.file_in, args.file_out, args.toc)
diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper
new file mode 100755
index 000000000000..7a5fcef25429
--- /dev/null
+++ b/tools/docs/sphinx-build-wrapper
@@ -0,0 +1,864 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=R0902, R0912, R0913, R0914, R0915, R0917, C0103
+#
+# Converted from docs Makefile and parallel-wrapper.sh, both under
+# GPLv2, copyrighted since 2008 by the following authors:
+#
+#    Akira Yokosawa <akiyks@gmail.com>
+#    Arnd Bergmann <arnd@arndb.de>
+#    Breno Leitao <leitao@debian.org>
+#    Carlos Bilbao <carlos.bilbao@amd.com>
+#    Dave Young <dyoung@redhat.com>
+#    Donald Hunter <donald.hunter@gmail.com>
+#    Geert Uytterhoeven <geert+renesas@glider.be>
+#    Jani Nikula <jani.nikula@intel.com>
+#    Jan Stancek <jstancek@redhat.com>
+#    Jonathan Corbet <corbet@lwn.net>
+#    Joshua Clayton <stillcompiling@gmail.com>
+#    Kees Cook <keescook@chromium.org>
+#    Linus Torvalds <torvalds@linux-foundation.org>
+#    Magnus Damm <damm+renesas@opensource.se>
+#    Masahiro Yamada <masahiroy@kernel.org>
+#    Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#    Maxim Cournoyer <maxim.cournoyer@gmail.com>
+#    Peter Foley <pefoley2@pefoley.com>
+#    Randy Dunlap <rdunlap@infradead.org>
+#    Rob Herring <robh@kernel.org>
+#    Shuah Khan <shuahkh@osg.samsung.com>
+#    Thorsten Blum <thorsten.blum@toblux.com>
+#    Tomas Winkler <tomas.winkler@intel.com>
+
+
+"""
+Sphinx build wrapper that handles Kernel-specific business rules:
+
+- it gets the Kernel build environment vars;
+- it determines what's the best parallelism;
+- it handles SPHINXDIRS
+
+This tool ensures that MIN_PYTHON_VERSION is satisfied. If version is
+below that, it seeks for a new Python version. If found, it re-runs using
+the newer version.
+"""
+
+import argparse
+import locale
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+
+from concurrent import futures
+from glob import glob
+
+
+LIB_DIR = "../lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from kdoc.python_version import PythonVersion
+from kdoc.latex_fonts import LatexFontChecker
+from jobserver import JobserverExec         # pylint: disable=C0413,C0411,E0401
+
+#
+#  Some constants
+#
+VENV_DEFAULT = "sphinx_latest"
+MIN_PYTHON_VERSION = PythonVersion("3.7").version
+PAPER = ["", "a4", "letter"]
+
+TARGETS = {
+    "cleandocs":     { "builder": "clean" },
+    "linkcheckdocs": { "builder": "linkcheck" },
+    "htmldocs":      { "builder": "html" },
+    "epubdocs":      { "builder": "epub",    "out_dir": "epub" },
+    "texinfodocs":   { "builder": "texinfo", "out_dir": "texinfo" },
+    "infodocs":      { "builder": "texinfo", "out_dir": "texinfo" },
+    "mandocs":       { "builder": "man",     "out_dir": "man" },
+    "latexdocs":     { "builder": "latex",   "out_dir": "latex" },
+    "pdfdocs":       { "builder": "latex",   "out_dir": "latex" },
+    "xmldocs":       { "builder": "xml",     "out_dir": "xml" },
+}
+
+
+#
+# SphinxBuilder class
+#
+
+class SphinxBuilder:
+    """
+    Handles a sphinx-build target, adding needed arguments to build
+    with the Kernel.
+    """
+
+    def get_path(self, path, use_cwd=False, abs_path=False):
+        """
+        Ancillary routine to handle patches the right way, as shell does.
+
+        It first expands "~" and "~user". Then, if patch is not absolute,
+        join self.srctree. Finally, if requested, convert to abspath.
+        """
+
+        path = os.path.expanduser(path)
+        if not path.startswith("/"):
+            if use_cwd:
+                base = os.getcwd()
+            else:
+                base = self.srctree
+
+            path = os.path.join(base, path)
+
+        if abs_path:
+            return os.path.abspath(path)
+
+        return path
+
+    def check_rust(self):
+        """
+        Checks if Rust is enabled
+        """
+        self.rustdoc = False
+
+        config = os.path.join(self.srctree, ".config")
+
+        if not os.path.isfile(config):
+            return
+
+        re_rust = re.compile(r"CONFIG_RUST=(m|y)")
+
+        try:
+            with open(config, "r", encoding="utf-8") as fp:
+                for line in fp:
+                    if re_rust.match(line):
+                        self.rustdoc = True
+                        return
+
+        except OSError as e:
+            print(f"Failed to open {config}", file=sys.stderr)
+
+    def get_sphinx_extra_opts(self, n_jobs):
+        """
+        Get the number of jobs to be used for docs build passed via command
+        line and desired sphinx verbosity.
+
+        The number of jobs can be on different places:
+
+        1) It can be passed via "-j" argument;
+        2) The SPHINXOPTS="-j8" env var may have "-j";
+        3) if called via GNU make, -j specifies the desired number of jobs.
+           with GNU makefile, this number is available via POSIX jobserver;
+        4) if none of the above is available, it should default to "-jauto",
+           and let sphinx decide the best value.
+        """
+
+        #
+        # SPHINXOPTS env var, if used, contains extra arguments to be used
+        # by sphinx-build time. Among them, it may contain sphinx verbosity
+        # and desired number of parallel jobs.
+        #
+        parser = argparse.ArgumentParser()
+        parser.add_argument('-j', '--jobs', type=int)
+        parser.add_argument('-q', '--quiet', action='store_true')
+
+        #
+        # Other sphinx-build arguments go as-is, so place them
+        # at self.sphinxopts, using shell parser
+        #
+        sphinxopts = shlex.split(os.environ.get("SPHINXOPTS", ""))
+
+        #
+        # Build a list of sphinx args, honoring verbosity here if specified
+        #
+
+        verbose = self.verbose
+        sphinx_args, self.sphinxopts = parser.parse_known_args(sphinxopts)
+        if sphinx_args.quiet is True:
+            verbose = False
+
+        #
+        # If the user explicitly sets "-j" at command line, use it.
+        # Otherwise, pick it from SPHINXOPTS args
+        #
+        if n_jobs:
+            self.n_jobs = n_jobs
+        elif sphinx_args.jobs:
+            self.n_jobs = sphinx_args.jobs
+        else:
+            self.n_jobs = None
+
+        if not verbose:
+            self.sphinxopts += ["-q"]
+
+    def __init__(self, builddir, venv=None, verbose=False, n_jobs=None,
+                 interactive=None):
+        """Initialize internal variables"""
+        self.venv = venv
+        self.verbose = None
+
+        #
+        # Normal variables passed from Kernel's makefile
+        #
+        self.kernelversion = os.environ.get("KERNELVERSION", "unknown")
+        self.kernelrelease = os.environ.get("KERNELRELEASE", "unknown")
+        self.pdflatex = os.environ.get("PDFLATEX", "xelatex")
+
+        #
+        # Kernel main Makefile defines a PYTHON3 variable whose default is
+        # "python3". When set to a different value, it allows running a
+        # diferent version than the default official python3 package.
+        # Several distros package python3xx-sphinx packages with newer
+        # versions of Python and sphinx-build.
+        #
+        # Honor such variable different than default
+        #
+        self.python = os.environ.get("PYTHON3")
+        if self.python == "python3":
+            self.python = None
+
+        if not interactive:
+            self.latexopts = os.environ.get("LATEXOPTS", "-interaction=batchmode -no-shell-escape")
+        else:
+            self.latexopts = os.environ.get("LATEXOPTS", "")
+
+        if not verbose:
+            verbose = bool(os.environ.get("KBUILD_VERBOSE", "") != "")
+
+        if verbose is not None:
+            self.verbose = verbose
+
+        #
+        # Source tree directory. This needs to be at os.environ, as
+        # Sphinx extensions use it
+        #
+        self.srctree = os.environ.get("srctree")
+        if not self.srctree:
+            self.srctree = "."
+            os.environ["srctree"] = self.srctree
+
+        #
+        # Now that we can expand srctree, get other directories as well
+        #
+        self.sphinxbuild = os.environ.get("SPHINXBUILD", "sphinx-build")
+        self.kerneldoc = self.get_path(os.environ.get("KERNELDOC",
+                                                      "scripts/kernel-doc.py"))
+        self.builddir = self.get_path(builddir, use_cwd=True, abs_path=True)
+
+        #
+        # Get directory locations for LaTeX build toolchain
+        #
+        self.pdflatex_cmd = shutil.which(self.pdflatex)
+        self.latexmk_cmd = shutil.which("latexmk")
+
+        self.env = os.environ.copy()
+
+        self.get_sphinx_extra_opts(n_jobs)
+
+        self.check_rust()
+
+        #
+        # If venv command line argument is specified, run Sphinx from venv
+        #
+        if venv:
+            bin_dir = os.path.join(venv, "bin")
+            if not os.path.isfile(os.path.join(bin_dir, "activate")):
+                sys.exit(f"Venv {venv} not found.")
+
+            # "activate" virtual env
+            self.env["PATH"] = bin_dir + ":" + self.env["PATH"]
+            self.env["VIRTUAL_ENV"] = venv
+            if "PYTHONHOME" in self.env:
+                del self.env["PYTHONHOME"]
+            print(f"Setting venv to {venv}")
+
+    def run_sphinx(self, sphinx_build, build_args, *args, **pwargs):
+        """
+        Executes sphinx-build using current python3 command.
+
+        When calling via GNU make, POSIX jobserver is used to tell how
+        many jobs are still available from a job pool. claim all remaining
+        jobs, as we don't want sphinx-build to run in parallel with other
+        jobs.
+
+        Despite that, the user may actually force a different value than
+        the number of available jobs via command line.
+
+        The "with" logic here is used to ensure that the claimed jobs will
+        be freed once subprocess finishes
+        """
+
+        with JobserverExec() as jobserver:
+            if jobserver.claim:
+                #
+                # when GNU make is used, claim available jobs from jobserver
+                #
+                n_jobs = str(jobserver.claim)
+            else:
+                #
+                # Otherwise, let sphinx decide by default
+                #
+                n_jobs = "auto"
+
+            #
+            # If explicitly requested via command line, override default
+            #
+            if self.n_jobs:
+                n_jobs = str(self.n_jobs)
+
+            #
+            # We can't simply call python3 sphinx-build, as OpenSUSE
+            # Tumbleweed uses an ELF binary file (/usr/bin/alts) to switch
+            # between different versions of sphinx-build. So, only call it
+            # prepending "python3.xx" when PYTHON3 variable is not default.
+            #
+            if self.python:
+                cmd = [self.python]
+            else:
+                cmd = []
+
+            cmd += [sphinx_build]
+            cmd += [f"-j{n_jobs}"]
+            cmd += build_args
+            cmd += self.sphinxopts
+
+            if self.verbose:
+                print(" ".join(cmd))
+
+            return subprocess.call(cmd, *args, **pwargs)
+
+    def handle_html(self, css, output_dir):
+        """
+        Extra steps for HTML and epub output.
+
+        For such targets, we need to ensure that CSS will be properly
+        copied to the output _static directory
+        """
+
+        if css:
+            css = os.path.expanduser(css)
+            if not css.startswith("/"):
+                css = os.path.join(self.srctree, css)
+
+            static_dir = os.path.join(output_dir, "_static")
+            os.makedirs(static_dir, exist_ok=True)
+
+            try:
+                shutil.copy2(css, static_dir)
+            except (OSError, IOError) as e:
+                print(f"Warning: Failed to copy CSS: {e}", file=sys.stderr)
+
+        if self.rustdoc:
+            print("Building rust docs")
+            if "MAKE" in self.env:
+                cmd = [self.env["MAKE"]]
+            else:
+                cmd = ["make", "LLVM=1"]
+
+            cmd += [ "rustdoc"]
+            if self.verbose:
+                print(" ".join(cmd))
+
+            try:
+                subprocess.run(cmd, check=True)
+            except subprocess.CalledProcessError as e:
+                print(f"Ignored errors when building rustdoc: {e}. Is RUST enabled?",
+                      file=sys.stderr)
+
+    def build_pdf_file(self, latex_cmd, from_dir, path):
+        """Builds a single pdf file using latex_cmd"""
+        try:
+            subprocess.run(latex_cmd + [path],
+                            cwd=from_dir, check=True, env=self.env)
+
+            return True
+        except subprocess.CalledProcessError:
+            return False
+
+    def pdf_parallel_build(self, tex_suffix, latex_cmd, tex_files, n_jobs):
+        """Build PDF files in parallel if possible"""
+        builds = {}
+        build_failed = False
+        max_len = 0
+        has_tex = False
+
+        #
+        # LaTeX PDF error code is almost useless for us:
+        # any warning makes it non-zero. For kernel doc builds it always return
+        # non-zero even when build succeeds. So, let's do the best next thing:
+        # Ignore build errors. At the end, check if all PDF files were built,
+        # printing a summary with the built ones and returning 0 if all of
+        # them were actually built.
+        #
+        with futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
+            jobs = {}
+
+            for from_dir, pdf_dir, entry in tex_files:
+                name = entry.name
+
+                if not name.endswith(tex_suffix):
+                    continue
+
+                name = name[:-len(tex_suffix)]
+                has_tex = True
+
+                future = executor.submit(self.build_pdf_file, latex_cmd,
+                                         from_dir, entry.path)
+                jobs[future] = (from_dir, pdf_dir, name)
+
+            for future in futures.as_completed(jobs):
+                from_dir, pdf_dir, name = jobs[future]
+
+                pdf_name = name + ".pdf"
+                pdf_from = os.path.join(from_dir, pdf_name)
+                pdf_to = os.path.join(pdf_dir, pdf_name)
+                out_name = os.path.relpath(pdf_to, self.builddir)
+                max_len = max(max_len, len(out_name))
+
+                try:
+                    success = future.result()
+
+                    if success and os.path.exists(pdf_from):
+                        os.rename(pdf_from, pdf_to)
+
+                        #
+                        # if verbose, get the name of built PDF file
+                        #
+                        if self.verbose:
+                           builds[out_name] = "SUCCESS"
+                    else:
+                        builds[out_name] = "FAILED"
+                        build_failed = True
+                except futures.Error as e:
+                    builds[out_name] = f"FAILED ({repr(e)})"
+                    build_failed = True
+
+        #
+        # Handle case where no .tex files were found
+        #
+        if not has_tex:
+            out_name = "LaTeX files"
+            max_len = max(max_len, len(out_name))
+            builds[out_name] = "FAILED: no .tex files were generated"
+            build_failed = True
+
+        return builds, build_failed, max_len
+
+    def handle_pdf(self, output_dirs, deny_vf):
+        """
+        Extra steps for PDF output.
+
+        As PDF is handled via a LaTeX output, after building the .tex file,
+        a new build is needed to create the PDF output from the latex
+        directory.
+        """
+        builds = {}
+        max_len = 0
+        tex_suffix = ".tex"
+        tex_files = []
+
+        #
+        # Since early 2024, Fedora and openSUSE tumbleweed have started
+        # deploying variable-font format of "Noto CJK", causing LaTeX
+        # to break with CJK. Work around it, by denying the variable font
+        # usage during xelatex build by passing the location of a config
+        # file with a deny list.
+        #
+        # See tools/docs/lib/latex_fonts.py for more details.
+        #
+        if deny_vf:
+            deny_vf = os.path.expanduser(deny_vf)
+            if os.path.isdir(deny_vf):
+                self.env["XDG_CONFIG_HOME"] = deny_vf
+
+        for from_dir in output_dirs:
+            pdf_dir = os.path.join(from_dir, "../pdf")
+            os.makedirs(pdf_dir, exist_ok=True)
+
+            if self.latexmk_cmd:
+                latex_cmd = [self.latexmk_cmd, f"-{self.pdflatex}"]
+            else:
+                latex_cmd = [self.pdflatex]
+
+            latex_cmd.extend(shlex.split(self.latexopts))
+
+            # Get a list of tex files to process
+            with os.scandir(from_dir) as it:
+                for entry in it:
+                    if entry.name.endswith(tex_suffix):
+                        tex_files.append((from_dir, pdf_dir, entry))
+
+        #
+        # When using make, this won't be used, as the number of jobs comes
+        # from POSIX jobserver. So, this covers the case where build comes
+        # from command line. On such case, serialize by default, except if
+        # the user explicitly sets the number of jobs.
+        #
+        n_jobs = 1
+
+        # n_jobs is either an integer or "auto". Only use it if it is a number
+        if self.n_jobs:
+            try:
+                n_jobs = int(self.n_jobs)
+            except ValueError:
+                pass
+
+        #
+        # When using make, jobserver.claim is the number of jobs that were
+        # used with "-j" and that aren't used by other make targets
+        #
+        with JobserverExec() as jobserver:
+            n_jobs = 1
+
+            #
+            # Handle the case when a parameter is passed via command line,
+            # using it as default, if jobserver doesn't claim anything
+            #
+            if self.n_jobs:
+                try:
+                    n_jobs = int(self.n_jobs)
+                except ValueError:
+                    pass
+
+            if jobserver.claim:
+                n_jobs = jobserver.claim
+
+            builds, build_failed, max_len = self.pdf_parallel_build(tex_suffix,
+                                                                    latex_cmd,
+                                                                    tex_files,
+                                                                    n_jobs)
+
+        #
+        # In verbose mode, print a summary with the build results per file.
+        # Otherwise, print a single line with all failures, if any.
+        # On both cases, return code 1 indicates build failures,
+        #
+        if self.verbose:
+            msg = "Summary"
+            msg += "\n" + "=" * len(msg)
+            print()
+            print(msg)
+
+            for pdf_name, pdf_file in builds.items():
+                print(f"{pdf_name:<{max_len}}: {pdf_file}")
+
+            print()
+            if build_failed:
+                msg = LatexFontChecker().check()
+                if msg:
+                    print(msg)
+
+                sys.exit("Error: not all PDF files were created.")
+
+        elif build_failed:
+            n_failures = len(builds)
+            failures = ", ".join(builds.keys())
+
+            msg = LatexFontChecker().check()
+            if msg:
+                print(msg)
+
+            sys.exit(f"Error: Can't build {n_failures} PDF file(s): {failures}")
+
+    def handle_info(self, output_dirs):
+        """
+        Extra steps for Info output.
+
+        For texinfo generation, an additional make is needed from the
+        texinfo directory.
+        """
+
+        for output_dir in output_dirs:
+            try:
+                subprocess.run(["make", "info"], cwd=output_dir, check=True)
+            except subprocess.CalledProcessError as e:
+                sys.exit(f"Error generating info docs: {e}")
+
+    def handle_man(self, kerneldoc, docs_dir, src_dir, output_dir):
+        """
+        Create man pages from kernel-doc output
+        """
+
+        re_kernel_doc = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
+        re_man = re.compile(r'^\.TH "[^"]*" (\d+) "([^"]*)"')
+
+        if docs_dir == src_dir:
+            #
+            # Pick the entire set of kernel-doc markups from the entire tree
+            #
+            kdoc_files = set([self.srctree])
+        else:
+            kdoc_files = set()
+
+            for fname in glob(os.path.join(src_dir, "**"), recursive=True):
+                if os.path.isfile(fname) and fname.endswith(".rst"):
+                    with open(fname, "r", encoding="utf-8") as in_fp:
+                        data = in_fp.read()
+
+                    for line in data.split("\n"):
+                        match = re_kernel_doc.match(line)
+                        if match:
+                            if os.path.isfile(match.group(1)):
+                                kdoc_files.add(match.group(1))
+
+        if not kdoc_files:
+                sys.exit(f"Directory {src_dir} doesn't contain kernel-doc tags")
+
+        cmd = [ kerneldoc, "-m" ] + sorted(kdoc_files)
+        try:
+            if self.verbose:
+                print(" ".join(cmd))
+
+            result = subprocess.run(cmd, stdout=subprocess.PIPE, text= True)
+
+            if result.returncode:
+                print(f"Warning: kernel-doc returned {result.returncode} warnings")
+
+        except (OSError, ValueError, subprocess.SubprocessError) as e:
+            sys.exit(f"Failed to create man pages for {src_dir}: {repr(e)}")
+
+        fp = None
+        try:
+            for line in result.stdout.split("\n"):
+                match = re_man.match(line)
+                if not match:
+                    if fp:
+                        fp.write(line + '\n')
+                    continue
+
+                if fp:
+                    fp.close()
+
+                fname = f"{output_dir}/{match.group(2)}.{match.group(1)}"
+
+                if self.verbose:
+                    print(f"Creating {fname}")
+                fp = open(fname, "w", encoding="utf-8")
+                fp.write(line + '\n')
+        finally:
+            if fp:
+                fp.close()
+
+    def cleandocs(self, builder):           # pylint: disable=W0613
+        """Remove documentation output directory"""
+        shutil.rmtree(self.builddir, ignore_errors=True)
+
+    def build(self, target, sphinxdirs=None,
+              theme=None, css=None, paper=None, deny_vf=None,
+              skip_sphinx=False):
+        """
+        Build documentation using Sphinx. This is the core function of this
+        module. It prepares all arguments required by sphinx-build.
+        """
+
+        builder = TARGETS[target]["builder"]
+        out_dir = TARGETS[target].get("out_dir", "")
+
+        #
+        # Cleandocs doesn't require sphinx-build
+        #
+        if target == "cleandocs":
+            self.cleandocs(builder)
+            return
+
+        if theme:
+            os.environ["DOCS_THEME"] = theme
+
+        #
+        # Other targets require sphinx-build, so check if it exists
+        #
+        if not skip_sphinx:
+            sphinxbuild = shutil.which(self.sphinxbuild, path=self.env["PATH"])
+            if not sphinxbuild and target != "mandocs":
+                sys.exit(f"Error: {self.sphinxbuild} not found in PATH.\n")
+
+        if target == "pdfdocs":
+            if not self.pdflatex_cmd and not self.latexmk_cmd:
+                sys.exit("Error: pdflatex or latexmk required for PDF generation")
+
+        docs_dir = os.path.abspath(os.path.join(self.srctree, "Documentation"))
+
+        #
+        # Fill in base arguments for Sphinx build
+        #
+        kerneldoc = self.kerneldoc
+        if kerneldoc.startswith(self.srctree):
+            kerneldoc = os.path.relpath(kerneldoc, self.srctree)
+
+        args = [ "-b", builder, "-c", docs_dir ]
+
+        if builder == "latex":
+            if not paper:
+                paper = PAPER[1]
+
+            args.extend(["-D", f"latex_elements.papersize={paper}paper"])
+
+        if self.rustdoc:
+            args.extend(["-t", "rustdoc"])
+
+        if not sphinxdirs:
+            sphinxdirs = os.environ.get("SPHINXDIRS", ".")
+
+        #
+        # The sphinx-build tool has a bug: internally, it tries to set
+        # locale with locale.setlocale(locale.LC_ALL, ''). This causes a
+        # crash if language is not set. Detect and fix it.
+        #
+        try:
+            locale.setlocale(locale.LC_ALL, '')
+        except locale.Error:
+            self.env["LC_ALL"] = "C"
+
+        #
+        # sphinxdirs can be a list or a whitespace-separated string
+        #
+        sphinxdirs_list = []
+        for sphinxdir in sphinxdirs:
+            if isinstance(sphinxdir, list):
+                sphinxdirs_list += sphinxdir
+            else:
+                sphinxdirs_list += sphinxdir.split()
+
+        #
+        # Step 1:  Build each directory in separate.
+        #
+        # This is not the best way of handling it, as cross-references between
+        # them will be broken, but this is what we've been doing since
+        # the beginning.
+        #
+        output_dirs = []
+        for sphinxdir in sphinxdirs_list:
+            src_dir = os.path.join(docs_dir, sphinxdir)
+            doctree_dir = os.path.join(self.builddir, ".doctrees")
+            output_dir = os.path.join(self.builddir, sphinxdir, out_dir)
+
+            #
+            # Make directory names canonical
+            #
+            src_dir = os.path.normpath(src_dir)
+            doctree_dir = os.path.normpath(doctree_dir)
+            output_dir = os.path.normpath(output_dir)
+
+            os.makedirs(doctree_dir, exist_ok=True)
+            os.makedirs(output_dir, exist_ok=True)
+
+            output_dirs.append(output_dir)
+
+            build_args = args + [
+                "-d", doctree_dir,
+                "-D", f"kerneldoc_bin={kerneldoc}",
+                "-D", f"version={self.kernelversion}",
+                "-D", f"release={self.kernelrelease}",
+                "-D", f"kerneldoc_srctree={self.srctree}",
+                src_dir,
+                output_dir,
+            ]
+
+            if target == "mandocs":
+                self.handle_man(kerneldoc, docs_dir, src_dir, output_dir)
+            elif not skip_sphinx:
+                try:
+                    result = self.run_sphinx(sphinxbuild, build_args,
+                                             env=self.env)
+
+                    if result:
+                        sys.exit(f"Build failed: return code: {result}")
+
+                except (OSError, ValueError, subprocess.SubprocessError) as e:
+                    sys.exit(f"Build failed: {repr(e)}")
+
+            #
+            # Ensure that each html/epub output will have needed static files
+            #
+            if target in ["htmldocs", "epubdocs"]:
+                self.handle_html(css, output_dir)
+
+        #
+        # Step 2: Some targets (PDF and info) require an extra step once
+        #         sphinx-build finishes
+        #
+        if target == "pdfdocs":
+            self.handle_pdf(output_dirs, deny_vf)
+        elif target == "infodocs":
+            self.handle_info(output_dirs)
+
+def jobs_type(value):
+    """
+    Handle valid values for -j. Accepts Sphinx "-jauto", plus a number
+    equal or bigger than one.
+    """
+    if value is None:
+        return None
+
+    if value.lower() == 'auto':
+        return value.lower()
+
+    try:
+        if int(value) >= 1:
+            return value
+
+        raise argparse.ArgumentTypeError(f"Minimum jobs is 1, got {value}")
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"Must be 'auto' or positive integer, got {value}")  # pylint: disable=W0707
+
+def main():
+    """
+    Main function. The only mandatory argument is the target. If not
+    specified, the other arguments will use default values if not
+    specified at os.environ.
+    """
+    parser = argparse.ArgumentParser(description="Kernel documentation builder")
+
+    parser.add_argument("target", choices=list(TARGETS.keys()),
+                        help="Documentation target to build")
+    parser.add_argument("--sphinxdirs", nargs="+",
+                        help="Specific directories to build")
+    parser.add_argument("--builddir", default="output",
+                        help="Sphinx configuration file")
+
+    parser.add_argument("--theme", help="Sphinx theme to use")
+
+    parser.add_argument("--css", help="Custom CSS file for HTML/EPUB")
+
+    parser.add_argument("--paper", choices=PAPER, default=PAPER[0],
+                        help="Paper size for LaTeX/PDF output")
+
+    parser.add_argument('--deny-vf',
+                        help="Configuration to deny variable fonts on pdf builds")
+
+    parser.add_argument("-v", "--verbose", action='store_true',
+                        help="place build in verbose mode")
+
+    parser.add_argument('-j', '--jobs', type=jobs_type,
+                        help="Sets number of jobs to use with sphinx-build")
+
+    parser.add_argument('-i', '--interactive', action='store_true',
+                        help="Change latex default to run in interactive mode")
+
+    parser.add_argument('-s', '--skip-sphinx-build', action='store_true',
+                        help="Skip sphinx-build step")
+
+    parser.add_argument("-V", "--venv", nargs='?', const=f'{VENV_DEFAULT}',
+                        default=None,
+                        help=f'If used, run Sphinx from a venv dir (default dir: {VENV_DEFAULT})')
+
+    args = parser.parse_args()
+
+    PythonVersion.check_python(MIN_PYTHON_VERSION, show_alternatives=True,
+                               bail_out=True)
+
+    builder = SphinxBuilder(builddir=args.builddir, venv=args.venv,
+                            verbose=args.verbose, n_jobs=args.jobs,
+                            interactive=args.interactive)
+
+    builder.build(args.target, sphinxdirs=args.sphinxdirs,
+                  theme=args.theme, css=args.css, paper=args.paper,
+                  deny_vf=args.deny_vf,
+                  skip_sphinx=args.skip_sphinx_build)
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install
new file mode 100755
index 000000000000..965c9b093a41
--- /dev/null
+++ b/tools/docs/sphinx-pre-install
@@ -0,0 +1,1543 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=C0103,C0114,C0115,C0116,C0301,C0302
+# pylint: disable=R0902,R0904,R0911,R0912,R0914,R0915,R1705,R1710,E1121
+
+# Note: this script requires at least Python 3.6 to run.
+# Don't add changes not compatible with it, it is meant to report
+# incompatible python versions.
+
+"""
+Dependency checker for Sphinx documentation Kernel build.
+
+This module provides tools to check for all required dependencies needed to
+build documentation using Sphinx, including system packages, Python modules
+and LaTeX packages for PDF generation.
+
+It detect packages for a subset of Linux distributions used by Kernel
+maintainers, showing hints and missing dependencies.
+
+The main class SphinxDependencyChecker handles the dependency checking logic
+and provides recommendations for installing missing packages. It supports both
+system package installations and  Python virtual environments. By default,
+system pacage install is recommended.
+"""
+
+import argparse
+import locale
+import os
+import re
+import subprocess
+import sys
+from glob import glob
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+from kdoc.python_version import PythonVersion
+
+RECOMMENDED_VERSION = PythonVersion("3.4.3").version
+MIN_PYTHON_VERSION = PythonVersion("3.7").version
+
+
+class DepManager:
+    """
+    Manage package dependencies. There are three types of dependencies:
+
+    - System: dependencies required for docs build;
+    - Python: python dependencies for a native distro Sphinx install;
+    - PDF: dependencies needed by PDF builds.
+
+    Each dependency can be mandatory or optional. Not installing an optional
+    dependency won't break the build, but will cause degradation at the
+    docs output.
+    """
+
+    # Internal types of dependencies. Don't use them outside DepManager class.
+    _SYS_TYPE = 0
+    _PHY_TYPE = 1
+    _PDF_TYPE = 2
+
+    # Dependencies visible outside the class.
+    # The keys are tuple with: (type, is_mandatory flag).
+    #
+    # Currently we're not using all optional dep types. Yet, we'll keep all
+    # possible combinations here. They're not many, and that makes easier
+    # if later needed and for the name() method below
+
+    SYSTEM_MANDATORY = (_SYS_TYPE, True)
+    PYTHON_MANDATORY = (_PHY_TYPE, True)
+    PDF_MANDATORY = (_PDF_TYPE, True)
+
+    SYSTEM_OPTIONAL = (_SYS_TYPE, False)
+    PYTHON_OPTIONAL = (_PHY_TYPE, False)
+    PDF_OPTIONAL = (_PDF_TYPE, True)
+
+    def __init__(self, pdf):
+        """
+        Initialize internal vars:
+
+        - missing: missing dependencies list, containing a distro-independent
+                   name for a missing dependency and its type.
+        - missing_pkg: ancillary dict containing missing dependencies in
+                       distro namespace, organized by type.
+        - need: total number of needed dependencies. Never cleaned.
+        - optional: total number of optional dependencies. Never cleaned.
+        - pdf: Is PDF support enabled?
+        """
+        self.missing = {}
+        self.missing_pkg = {}
+        self.need = 0
+        self.optional = 0
+        self.pdf = pdf
+
+    @staticmethod
+    def name(dtype):
+        """
+        Ancillary routine to output a warn/error message reporting
+        missing dependencies.
+        """
+        if dtype[0] == DepManager._SYS_TYPE:
+            msg = "build"
+        elif dtype[0] == DepManager._PHY_TYPE:
+            msg = "Python"
+        else:
+            msg = "PDF"
+
+        if dtype[1]:
+            return f"ERROR: {msg} mandatory deps missing"
+        else:
+            return f"Warning: {msg} optional deps missing"
+
+    @staticmethod
+    def is_optional(dtype):
+        """Ancillary routine to report if a dependency is optional"""
+        return not dtype[1]
+
+    @staticmethod
+    def is_pdf(dtype):
+        """Ancillary routine to report if a dependency is for PDF generation"""
+        if dtype[0] == DepManager._PDF_TYPE:
+            return True
+
+        return False
+
+    def add_package(self, package, dtype):
+        """
+        Add a package at the self.missing() dictionary.
+        Doesn't update missing_pkg.
+        """
+        is_optional = DepManager.is_optional(dtype)
+        self.missing[package] = dtype
+        if is_optional:
+            self.optional += 1
+        else:
+            self.need += 1
+
+    def del_package(self, package):
+        """
+        Remove a package at the self.missing() dictionary.
+        Doesn't update missing_pkg.
+        """
+        if package in self.missing:
+            del self.missing[package]
+
+    def clear_deps(self):
+        """
+        Clear dependencies without changing needed/optional.
+
+        This is an ackward way to have a separate section to recommend
+        a package after system main dependencies.
+
+        TODO: rework the logic to prevent needing it.
+        """
+
+        self.missing = {}
+        self.missing_pkg = {}
+
+    def check_missing(self, progs):
+        """
+        Update self.missing_pkg, using progs dict to convert from the
+        agnostic package name to distro-specific one.
+
+        Returns an string with the packages to be installed, sorted and
+        with eventual duplicates removed.
+        """
+
+        self.missing_pkg = {}
+
+        for prog, dtype in sorted(self.missing.items()):
+            # At least on some LTS distros like CentOS 7, texlive doesn't
+            # provide all packages we need. When such distros are
+            # detected, we have to disable PDF output.
+            #
+            # So, we need to ignore the packages that distros would
+            # need for LaTeX to work
+            if DepManager.is_pdf(dtype) and not self.pdf:
+                self.optional -= 1
+                continue
+
+            if not dtype in self.missing_pkg:
+                self.missing_pkg[dtype] = []
+
+            self.missing_pkg[dtype].append(progs.get(prog, prog))
+
+        install = []
+        for dtype, pkgs in self.missing_pkg.items():
+            install += pkgs
+
+        return " ".join(sorted(set(install)))
+
+    def warn_install(self):
+        """
+        Emit warnings/errors related to missing packages.
+        """
+
+        output_msg = ""
+
+        for dtype in sorted(self.missing_pkg.keys()):
+            progs = " ".join(sorted(set(self.missing_pkg[dtype])))
+
+            try:
+                name = DepManager.name(dtype)
+                output_msg += f'{name}:\t{progs}\n'
+            except KeyError:
+                raise KeyError(f"ERROR!!!: invalid dtype for {progs}: {dtype}")
+
+        if output_msg:
+            print(f"\n{output_msg}")
+
+class AncillaryMethods:
+    """
+    Ancillary methods that checks for missing dependencies for different
+    types of types, like binaries, python modules, rpm deps, etc.
+    """
+
+    @staticmethod
+    def which(prog):
+        """
+        Our own implementation of which(). We could instead use
+        shutil.which(), but this function is simple enough.
+        Probably faster to use this implementation than to import shutil.
+        """
+        for path in os.environ.get("PATH", "").split(":"):
+            full_path = os.path.join(path, prog)
+            if os.access(full_path, os.X_OK):
+                return full_path
+
+        return None
+
+    @staticmethod
+    def run(*args, **kwargs):
+        """
+        Excecute a command, hiding its output by default.
+        Preserve compatibility with older Python versions.
+        """
+
+        capture_output = kwargs.pop('capture_output', False)
+
+        if capture_output:
+            if 'stdout' not in kwargs:
+                kwargs['stdout'] = subprocess.PIPE
+            if 'stderr' not in kwargs:
+                kwargs['stderr'] = subprocess.PIPE
+        else:
+            if 'stdout' not in kwargs:
+                kwargs['stdout'] = subprocess.DEVNULL
+            if 'stderr' not in kwargs:
+                kwargs['stderr'] = subprocess.DEVNULL
+
+        # Don't break with older Python versions
+        if 'text' in kwargs and sys.version_info < (3, 7):
+            kwargs['universal_newlines'] = kwargs.pop('text')
+
+        return subprocess.run(*args, **kwargs)
+
+class MissingCheckers(AncillaryMethods):
+    """
+    Contains some ancillary checkers for different types of binaries and
+    package managers.
+    """
+
+    def __init__(self, args, texlive):
+        """
+        Initialize its internal variables
+        """
+        self.pdf = args.pdf
+        self.virtualenv = args.virtualenv
+        self.version_check = args.version_check
+        self.texlive = texlive
+
+        self.min_version = (0, 0, 0)
+        self.cur_version = (0, 0, 0)
+
+        self.deps = DepManager(self.pdf)
+
+        self.need_symlink = 0
+        self.need_sphinx = 0
+
+        self.verbose_warn_install = 1
+
+        self.virtenv_dir = ""
+        self.install = ""
+        self.python_cmd = ""
+
+        self.virtenv_prefix = ["sphinx_", "Sphinx_" ]
+
+    def check_missing_file(self, files, package, dtype):
+        """
+        Does the file exists? If not, add it to missing dependencies.
+        """
+        for f in files:
+            if os.path.exists(f):
+                return
+        self.deps.add_package(package, dtype)
+
+    def check_program(self, prog, dtype):
+        """
+        Does the program exists and it is at the PATH?
+        If not, add it to missing dependencies.
+        """
+        found = self.which(prog)
+        if found:
+            return found
+
+        self.deps.add_package(prog, dtype)
+
+        return None
+
+    def check_perl_module(self, prog, dtype):
+        """
+        Does perl have a dependency? Is it available?
+        If not, add it to missing dependencies.
+
+        Right now, we still need Perl for doc build, as it is required
+        by some tools called at docs or kernel build time, like:
+
+            tools/docs/documentation-file-ref-check
+
+        Also, checkpatch is on Perl.
+        """
+
+        # While testing with lxc download template, one of the
+        # distros (Oracle) didn't have perl - nor even an option to install
+        # before installing oraclelinux-release-el9 package.
+        #
+        # Check it before running an error. If perl is not there,
+        # add it as a mandatory package, as some parts of the doc builder
+        # needs it.
+        if not self.which("perl"):
+            self.deps.add_package("perl", DepManager.SYSTEM_MANDATORY)
+            self.deps.add_package(prog, dtype)
+            return
+
+        try:
+            self.run(["perl", f"-M{prog}", "-e", "1"], check=True)
+        except subprocess.CalledProcessError:
+            self.deps.add_package(prog, dtype)
+
+    def check_python_module(self, module, is_optional=False):
+        """
+        Does a python module exists outside venv? If not, add it to missing
+        dependencies.
+        """
+        if is_optional:
+            dtype = DepManager.PYTHON_OPTIONAL
+        else:
+            dtype = DepManager.PYTHON_MANDATORY
+
+        try:
+            self.run([self.python_cmd, "-c", f"import {module}"], check=True)
+        except subprocess.CalledProcessError:
+            self.deps.add_package(module, dtype)
+
+    def check_rpm_missing(self, pkgs, dtype):
+        """
+        Does a rpm package exists? If not, add it to missing dependencies.
+        """
+        for prog in pkgs:
+            try:
+                self.run(["rpm", "-q", prog], check=True)
+            except subprocess.CalledProcessError:
+                self.deps.add_package(prog, dtype)
+
+    def check_pacman_missing(self, pkgs, dtype):
+        """
+        Does a pacman package exists? If not, add it to missing dependencies.
+        """
+        for prog in pkgs:
+            try:
+                self.run(["pacman", "-Q", prog], check=True)
+            except subprocess.CalledProcessError:
+                self.deps.add_package(prog, dtype)
+
+    def check_missing_tex(self, is_optional=False):
+        """
+        Does a LaTeX package exists? If not, add it to missing dependencies.
+        """
+        if is_optional:
+            dtype = DepManager.PDF_OPTIONAL
+        else:
+            dtype = DepManager.PDF_MANDATORY
+
+        kpsewhich = self.which("kpsewhich")
+        for prog, package in self.texlive.items():
+
+            # If kpsewhich is not there, just add it to deps
+            if not kpsewhich:
+                self.deps.add_package(package, dtype)
+                continue
+
+            # Check if the package is needed
+            try:
+                result = self.run(
+                    [kpsewhich, prog], stdout=subprocess.PIPE, text=True, check=True
+                )
+
+                # Didn't find. Add it
+                if not result.stdout.strip():
+                    self.deps.add_package(package, dtype)
+
+            except subprocess.CalledProcessError:
+                # kpsewhich returned an error. Add it, just in case
+                self.deps.add_package(package, dtype)
+
+    def get_sphinx_fname(self):
+        """
+        Gets the binary filename for sphinx-build.
+        """
+        if "SPHINXBUILD" in os.environ:
+            return os.environ["SPHINXBUILD"]
+
+        fname = "sphinx-build"
+        if self.which(fname):
+            return fname
+
+        fname = "sphinx-build-3"
+        if self.which(fname):
+            self.need_symlink = 1
+            return fname
+
+        return ""
+
+    def get_sphinx_version(self, cmd):
+        """
+        Gets sphinx-build version.
+        """
+        env = os.environ.copy()
+
+        # The sphinx-build tool has a bug: internally, it tries to set
+        # locale with locale.setlocale(locale.LC_ALL, ''). This causes a
+        # crash if language is not set. Detect and fix it.
+        try:
+            locale.setlocale(locale.LC_ALL, '')
+        except Exception:
+            env["LC_ALL"] = "C"
+            env["LANG"] = "C"
+
+        try:
+            result = self.run([cmd, "--version"], env=env,
+                              stdout=subprocess.PIPE,
+                              stderr=subprocess.STDOUT,
+                              text=True, check=True)
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            return None
+
+        for line in result.stdout.split("\n"):
+            match = re.match(r"^sphinx-build\s+([\d\.]+)(?:\+(?:/[\da-f]+)|b\d+)?\s*$", line)
+            if match:
+                return PythonVersion.parse_version(match.group(1))
+
+            match = re.match(r"^Sphinx.*\s+([\d\.]+)\s*$", line)
+            if match:
+                return PythonVersion.parse_version(match.group(1))
+
+    def check_sphinx(self, conf):
+        """
+        Checks Sphinx minimal requirements
+        """
+        try:
+            with open(conf, "r", encoding="utf-8") as f:
+                for line in f:
+                    match = re.match(r"^\s*needs_sphinx\s*=\s*[\'\"]([\d\.]+)[\'\"]", line)
+                    if match:
+                        self.min_version = PythonVersion.parse_version(match.group(1))
+                        break
+        except IOError:
+            sys.exit(f"Can't open {conf}")
+
+        if not self.min_version:
+            sys.exit(f"Can't get needs_sphinx version from {conf}")
+
+        self.virtenv_dir = self.virtenv_prefix[0] + "latest"
+
+        sphinx = self.get_sphinx_fname()
+        if not sphinx:
+            self.need_sphinx = 1
+            return
+
+        self.cur_version = self.get_sphinx_version(sphinx)
+        if not self.cur_version:
+            sys.exit(f"{sphinx} didn't return its version")
+
+        if self.cur_version < self.min_version:
+            curver = PythonVersion.ver_str(self.cur_version)
+            minver = PythonVersion.ver_str(self.min_version)
+
+            print(f"ERROR: Sphinx version is {curver}. It should be >= {minver}")
+            self.need_sphinx = 1
+            return
+
+        # On version check mode, just assume Sphinx has all mandatory deps
+        if self.version_check and self.cur_version >= RECOMMENDED_VERSION:
+            sys.exit(0)
+
+    def catcheck(self, filename):
+        """
+        Reads a file if it exists, returning as string.
+        If not found, returns an empty string.
+        """
+        if os.path.exists(filename):
+            with open(filename, "r", encoding="utf-8") as f:
+                return f.read().strip()
+        return ""
+
+    def get_system_release(self):
+        """
+        Determine the system type. There's no unique way that would work
+        with all distros with a minimal package install. So, several
+        methods are used here.
+
+        By default, it will use lsb_release function. If not available, it will
+        fail back to reading the known different places where the distro name
+        is stored.
+
+        Several modern distros now have /etc/os-release, which usually have
+        a decent coverage.
+        """
+
+        system_release = ""
+
+        if self.which("lsb_release"):
+            result = self.run(["lsb_release", "-d"], capture_output=True, text=True)
+            system_release = result.stdout.replace("Description:", "").strip()
+
+        release_files = [
+            "/etc/system-release",
+            "/etc/redhat-release",
+            "/etc/lsb-release",
+            "/etc/gentoo-release",
+        ]
+
+        if not system_release:
+            for f in release_files:
+                system_release = self.catcheck(f)
+                if system_release:
+                    break
+
+        # This seems more common than LSB these days
+        if not system_release:
+            os_var = {}
+            try:
+                with open("/etc/os-release", "r", encoding="utf-8") as f:
+                    for line in f:
+                        match = re.match(r"^([\w\d\_]+)=\"?([^\"]*)\"?\n", line)
+                        if match:
+                            os_var[match.group(1)] = match.group(2)
+
+                system_release = os_var.get("NAME", "")
+                if "VERSION_ID" in os_var:
+                    system_release += " " + os_var["VERSION_ID"]
+                elif "VERSION" in os_var:
+                    system_release += " " + os_var["VERSION"]
+            except IOError:
+                pass
+
+        if not system_release:
+            system_release = self.catcheck("/etc/issue")
+
+        system_release = system_release.strip()
+
+        return system_release
+
+class SphinxDependencyChecker(MissingCheckers):
+    """
+    Main class for checking Sphinx documentation build dependencies.
+
+    - Check for missing system packages;
+    - Check for missing Python modules;
+    - Check for missing LaTeX packages needed by PDF generation;
+    - Propose Sphinx install via Python Virtual environment;
+    - Propose Sphinx install via distro-specific package install.
+    """
+    def __init__(self, args):
+        """Initialize checker variables"""
+
+        # List of required texlive packages on Fedora and OpenSuse
+        texlive = {
+            "amsfonts.sty":       "texlive-amsfonts",
+            "amsmath.sty":        "texlive-amsmath",
+            "amssymb.sty":        "texlive-amsfonts",
+            "amsthm.sty":         "texlive-amscls",
+            "anyfontsize.sty":    "texlive-anyfontsize",
+            "atbegshi.sty":       "texlive-oberdiek",
+            "bm.sty":             "texlive-tools",
+            "capt-of.sty":        "texlive-capt-of",
+            "cmap.sty":           "texlive-cmap",
+            "ctexhook.sty":       "texlive-ctex",
+            "ecrm1000.tfm":       "texlive-ec",
+            "eqparbox.sty":       "texlive-eqparbox",
+            "eu1enc.def":         "texlive-euenc",
+            "fancybox.sty":       "texlive-fancybox",
+            "fancyvrb.sty":       "texlive-fancyvrb",
+            "float.sty":          "texlive-float",
+            "fncychap.sty":       "texlive-fncychap",
+            "footnote.sty":       "texlive-mdwtools",
+            "framed.sty":         "texlive-framed",
+            "luatex85.sty":       "texlive-luatex85",
+            "multirow.sty":       "texlive-multirow",
+            "needspace.sty":      "texlive-needspace",
+            "palatino.sty":       "texlive-psnfss",
+            "parskip.sty":        "texlive-parskip",
+            "polyglossia.sty":    "texlive-polyglossia",
+            "tabulary.sty":       "texlive-tabulary",
+            "threeparttable.sty": "texlive-threeparttable",
+            "titlesec.sty":       "texlive-titlesec",
+            "ucs.sty":            "texlive-ucs",
+            "upquote.sty":        "texlive-upquote",
+            "wrapfig.sty":        "texlive-wrapfig",
+        }
+
+        super().__init__(args, texlive)
+
+        self.need_pip = False
+        self.rec_sphinx_upgrade = 0
+
+        self.system_release = self.get_system_release()
+        self.activate_cmd = ""
+
+        # Some distros may not have a Sphinx shipped package compatible with
+        # our minimal requirements
+        self.package_supported = True
+
+        # Recommend a new python version
+        self.recommend_python = None
+
+        # Certain hints are meant to be shown only once
+        self.distro_msg = None
+
+        self.latest_avail_ver = (0, 0, 0)
+        self.venv_ver = (0, 0, 0)
+
+        prefix = os.environ.get("srctree", ".") + "/"
+
+        self.conf = prefix + "Documentation/conf.py"
+        self.requirement_file = prefix + "Documentation/sphinx/requirements.txt"
+
+    def get_install_progs(self, progs, cmd, extra=None):
+        """
+        Check for missing dependencies using the provided program mapping.
+
+        The actual distro-specific programs are mapped via progs argument.
+        """
+        install = self.deps.check_missing(progs)
+
+        if self.verbose_warn_install:
+            self.deps.warn_install()
+
+        if not install:
+            return
+
+        if cmd:
+            if self.verbose_warn_install:
+                msg = "You should run:"
+            else:
+                msg = ""
+
+            if extra:
+                msg += "\n\t" + extra.replace("\n", "\n\t")
+
+            return(msg + "\n\tsudo " + cmd + " " + install)
+
+        return None
+
+    #
+    # Distro-specific hints methods
+    #
+
+    def give_debian_hints(self):
+        """
+        Provide package installation hints for Debian-based distros.
+        """
+        progs = {
+            "Pod::Usage":    "perl-modules",
+            "convert":       "imagemagick",
+            "dot":           "graphviz",
+            "ensurepip":     "python3-venv",
+            "python-sphinx": "python3-sphinx",
+            "rsvg-convert":  "librsvg2-bin",
+            "virtualenv":    "virtualenv",
+            "xelatex":       "texlive-xetex",
+            "yaml":          "python3-yaml",
+        }
+
+        if self.pdf:
+            pdf_pkgs = {
+                "fonts-dejavu": [
+                    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+                ],
+                "fonts-noto-cjk": [
+                    "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+                    "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+                    "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc",
+                ],
+                "tex-gyre": [
+                    "/usr/share/texmf/tex/latex/tex-gyre/tgtermes.sty"
+                ],
+                "texlive-fonts-recommended": [
+                    "/usr/share/texlive/texmf-dist/fonts/tfm/adobe/zapfding/pzdr.tfm",
+                ],
+                "texlive-lang-chinese": [
+                    "/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty",
+                ],
+            }
+
+            for package, files in pdf_pkgs.items():
+                self.check_missing_file(files, package, DepManager.PDF_MANDATORY)
+
+            self.check_program("dvipng", DepManager.PDF_MANDATORY)
+
+        if not self.distro_msg:
+            self.distro_msg = \
+                "Note: ImageMagick is broken on some distros, affecting PDF output. For more details:\n" \
+                "\thttps://askubuntu.com/questions/1158894/imagemagick-still-broken-using-with-usr-bin-convert"
+
+        return self.get_install_progs(progs, "apt-get install")
+
+    def give_redhat_hints(self):
+        """
+        Provide package installation hints for RedHat-based distros
+        (Fedora, RHEL and RHEL-based variants).
+        """
+        progs = {
+            "Pod::Usage":       "perl-Pod-Usage",
+            "convert":          "ImageMagick",
+            "dot":              "graphviz",
+            "python-sphinx":    "python3-sphinx",
+            "rsvg-convert":     "librsvg2-tools",
+            "virtualenv":       "python3-virtualenv",
+            "xelatex":          "texlive-xetex-bin",
+            "yaml":             "python3-pyyaml",
+        }
+
+        fedora_tex_pkgs = [
+            "dejavu-sans-fonts",
+            "dejavu-sans-mono-fonts",
+            "dejavu-serif-fonts",
+            "texlive-collection-fontsrecommended",
+            "texlive-collection-latex",
+            "texlive-xecjk",
+        ]
+
+        fedora = False
+        rel = None
+
+        match = re.search(r"(release|Linux)\s+(\d+)", self.system_release)
+        if match:
+            rel = int(match.group(2))
+
+        if not rel:
+            print("Couldn't identify release number")
+            noto_sans_redhat = None
+            self.pdf = False
+        elif re.search("Fedora", self.system_release):
+            # Fedora 38 and upper use this CJK font
+
+            noto_sans_redhat = "google-noto-sans-cjk-fonts"
+            fedora = True
+        else:
+            # Almalinux, CentOS, RHEL, ...
+
+            # at least up to version 9 (and Fedora < 38), that's the CJK font
+            noto_sans_redhat = "google-noto-sans-cjk-ttc-fonts"
+
+            progs["virtualenv"] = "python-virtualenv"
+
+            if not rel or rel < 8:
+                print("ERROR: Distro not supported. Too old?")
+                return
+
+            # RHEL 8 uses Python 3.6, which is not compatible with
+            # the build system anymore. Suggest Python 3.11
+            if rel == 8:
+                self.check_program("python3.9", DepManager.SYSTEM_MANDATORY)
+                progs["python3.9"] = "python39"
+                progs["yaml"] = "python39-pyyaml"
+
+                self.recommend_python = True
+
+                # There's no python39-sphinx package. Only pip is supported
+                self.package_supported = False
+
+            if not self.distro_msg:
+                self.distro_msg = \
+                    "Note: RHEL-based distros typically require extra repositories.\n" \
+                    "For most, enabling epel and crb are enough:\n" \
+                    "\tsudo dnf install -y epel-release\n" \
+                    "\tsudo dnf config-manager --set-enabled crb\n" \
+                    "Yet, some may have other required repositories. Those commands could be useful:\n" \
+                    "\tsudo dnf repolist all\n" \
+                    "\tsudo dnf repoquery --available --info <pkgs>\n" \
+                    "\tsudo dnf config-manager --set-enabled '*' # enable all - probably not what you want"
+
+        if self.pdf:
+            pdf_pkgs = [
+                "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+                "/usr/share/fonts/google-noto-sans-cjk-fonts/NotoSansCJK-Regular.ttc",
+            ]
+
+            self.check_missing_file(pdf_pkgs, noto_sans_redhat, DepManager.PDF_MANDATORY)
+
+            self.check_rpm_missing(fedora_tex_pkgs, DepManager.PDF_MANDATORY)
+
+            self.check_missing_tex(DepManager.PDF_MANDATORY)
+
+            # There's no texlive-ctex on RHEL 8 repositories. This will
+            # likely affect CJK pdf build only.
+            if not fedora and rel == 8:
+                self.deps.del_package("texlive-ctex")
+
+        return self.get_install_progs(progs, "dnf install")
+
+    def give_opensuse_hints(self):
+        """
+        Provide package installation hints for openSUSE-based distros
+        (Leap and Tumbleweed).
+        """
+        progs = {
+            "Pod::Usage":    "perl-Pod-Usage",
+            "convert":       "ImageMagick",
+            "dot":           "graphviz",
+            "python-sphinx": "python3-sphinx",
+            "virtualenv":    "python3-virtualenv",
+            "xelatex":       "texlive-xetex-bin texlive-dejavu",
+            "yaml":          "python3-pyyaml",
+        }
+
+        suse_tex_pkgs = [
+            "texlive-babel-english",
+            "texlive-caption",
+            "texlive-colortbl",
+            "texlive-courier",
+            "texlive-dvips",
+            "texlive-helvetic",
+            "texlive-makeindex",
+            "texlive-metafont",
+            "texlive-metapost",
+            "texlive-palatino",
+            "texlive-preview",
+            "texlive-times",
+            "texlive-zapfchan",
+            "texlive-zapfding",
+        ]
+
+        progs["latexmk"] = "texlive-latexmk-bin"
+
+        match = re.search(r"(Leap)\s+(\d+).(\d)", self.system_release)
+        if match:
+            rel = int(match.group(2))
+
+            # Leap 15.x uses Python 3.6, which is not compatible with
+            # the build system anymore. Suggest Python 3.11
+            if rel == 15:
+                if not self.which(self.python_cmd):
+                    self.check_program("python3.11", DepManager.SYSTEM_MANDATORY)
+                    progs["python3.11"] = "python311"
+                    self.recommend_python = True
+
+                progs.update({
+                    "python-sphinx": "python311-Sphinx python311-Sphinx-latex",
+                    "virtualenv":    "python311-virtualenv",
+                    "yaml":          "python311-PyYAML",
+                })
+        else:
+            # Tumbleweed defaults to Python 3.11
+
+            progs.update({
+                "python-sphinx": "python313-Sphinx python313-Sphinx-latex",
+                "virtualenv":    "python313-virtualenv",
+                "yaml":          "python313-PyYAML",
+            })
+
+        # FIXME: add support for installing CJK fonts
+        #
+        # I tried hard, but was unable to find a way to install
+        # "Noto Sans CJK SC" on openSUSE
+
+        if self.pdf:
+            self.check_rpm_missing(suse_tex_pkgs, DepManager.PDF_MANDATORY)
+        if self.pdf:
+            self.check_missing_tex()
+
+        return self.get_install_progs(progs, "zypper install --no-recommends")
+
+    def give_mageia_hints(self):
+        """
+        Provide package installation hints for Mageia and OpenMandriva.
+        """
+        progs = {
+            "Pod::Usage":    "perl-Pod-Usage",
+            "convert":       "ImageMagick",
+            "dot":           "graphviz",
+            "python-sphinx": "python3-sphinx",
+            "rsvg-convert":  "librsvg2",
+            "virtualenv":    "python3-virtualenv",
+            "xelatex":       "texlive",
+            "yaml":          "python3-yaml",
+        }
+
+        tex_pkgs = [
+            "texlive-fontsextra",
+            "texlive-fonts-asian",
+            "fonts-ttf-dejavu",
+        ]
+
+        if re.search(r"OpenMandriva", self.system_release):
+            packager_cmd = "dnf install"
+            noto_sans = "noto-sans-cjk-fonts"
+            tex_pkgs = [
+                "texlive-collection-basic",
+                "texlive-collection-langcjk",
+                "texlive-collection-fontsextra",
+                "texlive-collection-fontsrecommended"
+            ]
+
+            # Tested on OpenMandriva Lx 4.3
+            progs["convert"] = "imagemagick"
+            progs["yaml"] = "python-pyyaml"
+            progs["python-virtualenv"] = "python-virtualenv"
+            progs["python-sphinx"] = "python-sphinx"
+            progs["xelatex"] = "texlive"
+
+            self.check_program("python-virtualenv", DepManager.PYTHON_MANDATORY)
+
+            # On my tests with openMandriva LX 4.0 docker image, upgraded
+            # to 4.3, python-virtualenv package is broken: it is missing
+            # ensurepip. Without it, the alternative would be to run:
+            # python3 -m venv --without-pip ~/sphinx_latest, but running
+            # pip there won't install sphinx at venv.
+            #
+            # Add a note about that.
+
+            if not self.distro_msg:
+                self.distro_msg = \
+                    "Notes:\n"\
+                    "1. for venv, ensurepip could be broken, preventing its install method.\n" \
+                    "2. at least on OpenMandriva LX 4.3, texlive packages seem broken"
+
+        else:
+            packager_cmd = "urpmi"
+            noto_sans = "google-noto-sans-cjk-ttc-fonts"
+
+        progs["latexmk"] = "texlive-collection-basic"
+
+        if self.pdf:
+            pdf_pkgs = [
+                "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+                "/usr/share/fonts/TTF/NotoSans-Regular.ttf",
+            ]
+
+            self.check_missing_file(pdf_pkgs, noto_sans, DepManager.PDF_MANDATORY)
+            self.check_rpm_missing(tex_pkgs, DepManager.PDF_MANDATORY)
+
+        return self.get_install_progs(progs, packager_cmd)
+
+    def give_arch_linux_hints(self):
+        """
+        Provide package installation hints for ArchLinux.
+        """
+        progs = {
+            "convert":      "imagemagick",
+            "dot":          "graphviz",
+            "latexmk":      "texlive-core",
+            "rsvg-convert": "extra/librsvg",
+            "virtualenv":   "python-virtualenv",
+            "xelatex":      "texlive-xetex",
+            "yaml":         "python-yaml",
+        }
+
+        archlinux_tex_pkgs = [
+            "texlive-basic",
+            "texlive-binextra",
+            "texlive-core",
+            "texlive-fontsrecommended",
+            "texlive-langchinese",
+            "texlive-langcjk",
+            "texlive-latexextra",
+            "ttf-dejavu",
+        ]
+
+        if self.pdf:
+            self.check_pacman_missing(archlinux_tex_pkgs,
+                                      DepManager.PDF_MANDATORY)
+
+            self.check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc"],
+                                    "noto-fonts-cjk",
+                                    DepManager.PDF_MANDATORY)
+
+
+        return self.get_install_progs(progs, "pacman -S")
+
+    def give_gentoo_hints(self):
+        """
+        Provide package installation hints for Gentoo.
+        """
+        texlive_deps = [
+            "dev-texlive/texlive-fontsrecommended",
+            "dev-texlive/texlive-latexextra",
+            "dev-texlive/texlive-xetex",
+            "media-fonts/dejavu",
+        ]
+
+        progs = {
+            "convert":       "media-gfx/imagemagick",
+            "dot":           "media-gfx/graphviz",
+            "rsvg-convert":  "gnome-base/librsvg",
+            "virtualenv":    "dev-python/virtualenv",
+            "xelatex":       " ".join(texlive_deps),
+            "yaml":          "dev-python/pyyaml",
+            "python-sphinx": "dev-python/sphinx",
+        }
+
+        if self.pdf:
+            pdf_pkgs = {
+                "media-fonts/dejavu": [
+                    "/usr/share/fonts/dejavu/DejaVuSans.ttf",
+                ],
+                "media-fonts/noto-cjk": [
+                    "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
+                    "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc",
+                ],
+            }
+            for package, files in pdf_pkgs.items():
+                self.check_missing_file(files, package, DepManager.PDF_MANDATORY)
+
+        # Handling dependencies is a nightmare, as Gentoo refuses to emerge
+        # some packages if there's no package.use file describing them.
+        # To make it worse, compilation flags shall also be present there
+        # for some packages. If USE is not perfect, error/warning messages
+        #   like those are shown:
+        #
+        #   !!! The following binary packages have been ignored due to non matching USE:
+        #
+        #    =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_13 qt6 svg
+        #    =media-gfx/graphviz-12.2.1-r1 X pdf python_single_target_python3_12 -python_single_target_python3_13 qt6 svg
+        #    =media-gfx/graphviz-12.2.1-r1 X pdf qt6 svg
+        #    =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 qt6 svg
+        #    =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 python_single_target_python3_12 -python_single_target_python3_13 qt6 svg
+        #    =media-fonts/noto-cjk-20190416 X
+        #    =app-text/texlive-core-2024-r1 X cjk -xetex
+        #    =app-text/texlive-core-2024-r1 X -xetex
+        #    =app-text/texlive-core-2024-r1 -xetex
+        #    =dev-libs/zziplib-0.13.79-r1 sdl
+        #
+        # And will ignore such packages, installing the remaining ones. That
+        # affects mostly the image extension and PDF generation.
+
+        # Package dependencies and the minimal needed args:
+        portages = {
+            "graphviz": "media-gfx/graphviz",
+            "imagemagick": "media-gfx/imagemagick",
+            "media-libs": "media-libs/harfbuzz icu",
+            "media-fonts": "media-fonts/noto-cjk",
+            "texlive": "app-text/texlive-core xetex",
+            "zziblib": "dev-libs/zziplib sdl",
+        }
+
+        extra_cmds = ""
+        if not self.distro_msg:
+            self.distro_msg = "Note: Gentoo requires package.use to be adjusted before emerging packages"
+
+            use_base = "/etc/portage/package.use"
+            files = glob(f"{use_base}/*")
+
+            for fname, portage in portages.items():
+                install = False
+
+                while install is False:
+                    if not files:
+                        # No files under package.usage. Install all
+                        install = True
+                        break
+
+                    args = portage.split(" ")
+
+                    name = args.pop(0)
+
+                    cmd = ["grep", "-l", "-E", rf"^{name}\b" ] + files
+                    result = self.run(cmd, stdout=subprocess.PIPE, text=True)
+                    if result.returncode or not result.stdout.strip():
+                        # File containing portage name not found
+                        install = True
+                        break
+
+                    # Ensure that needed USE flags are present
+                    if args:
+                        match_fname = result.stdout.strip()
+                        with open(match_fname, 'r', encoding='utf8',
+                                errors='backslashreplace') as fp:
+                            for line in fp:
+                                for arg in args:
+                                    if arg.startswith("-"):
+                                        continue
+
+                                if not re.search(rf"\s*{arg}\b", line):
+                                    # Needed file argument not found
+                                    install = True
+                                    break
+
+                    # Everything looks ok, don't install
+                    break
+
+                # emit a code to setup missing USE
+                if install:
+                    extra_cmds += (f"sudo su -c 'echo \"{portage}\" > {use_base}/{fname}'\n")
+
+        # Now, we can use emerge and let it respect USE
+        return self.get_install_progs(progs,
+                                      "emerge --ask --changed-use --binpkg-respect-use=y",
+                                      extra_cmds)
+
+    def get_install(self):
+        """
+        OS-specific hints logic. Seeks for a hinter. If found, use it to
+        provide package-manager specific install commands.
+
+        Otherwise, outputs install instructions for the meta-packages.
+
+        Returns a string with the command to be executed to install the
+        the needed packages, if distro found. Otherwise, return just a
+        list of packages that require installation.
+        """
+        os_hints = {
+            re.compile("Red Hat Enterprise Linux"):   self.give_redhat_hints,
+            re.compile("Fedora"):                     self.give_redhat_hints,
+            re.compile("AlmaLinux"):                  self.give_redhat_hints,
+            re.compile("Amazon Linux"):               self.give_redhat_hints,
+            re.compile("CentOS"):                     self.give_redhat_hints,
+            re.compile("openEuler"):                  self.give_redhat_hints,
+            re.compile("Oracle Linux Server"):        self.give_redhat_hints,
+            re.compile("Rocky Linux"):                self.give_redhat_hints,
+            re.compile("Springdale Open Enterprise"): self.give_redhat_hints,
+
+            re.compile("Ubuntu"):                     self.give_debian_hints,
+            re.compile("Debian"):                     self.give_debian_hints,
+            re.compile("Devuan"):                     self.give_debian_hints,
+            re.compile("Kali"):                       self.give_debian_hints,
+            re.compile("Mint"):                       self.give_debian_hints,
+
+            re.compile("openSUSE"):                   self.give_opensuse_hints,
+
+            re.compile("Mageia"):                     self.give_mageia_hints,
+            re.compile("OpenMandriva"):               self.give_mageia_hints,
+
+            re.compile("Arch Linux"):                 self.give_arch_linux_hints,
+            re.compile("Gentoo"):                     self.give_gentoo_hints,
+        }
+
+        # If the OS is detected, use per-OS hint logic
+        for regex, os_hint in os_hints.items():
+            if regex.search(self.system_release):
+                return os_hint()
+
+        #
+        # Fall-back to generic hint code for other distros
+        # That's far from ideal, specially for LaTeX dependencies.
+        #
+        progs = {"sphinx-build": "sphinx"}
+        if self.pdf:
+            self.check_missing_tex()
+
+        self.distro_msg = \
+            f"I don't know distro {self.system_release}.\n" \
+            "So, I can't provide you a hint with the install procedure.\n" \
+            "There are likely missing dependencies."
+
+        return self.get_install_progs(progs, None)
+
+    #
+    # Common dependencies
+    #
+    def deactivate_help(self):
+        """
+        Print a helper message to disable a virtual environment.
+        """
+
+        print("\n    If you want to exit the virtualenv, you can use:")
+        print("\tdeactivate")
+
+    def get_virtenv(self):
+        """
+        Give a hint about how to activate an already-existing virtual
+        environment containing sphinx-build.
+
+        Returns a tuble with (activate_cmd_path, sphinx_version) with
+        the newest available virtual env.
+        """
+
+        cwd = os.getcwd()
+
+        activates = []
+
+        # Add all sphinx prefixes with possible version numbers
+        for p in self.virtenv_prefix:
+            activates += glob(f"{cwd}/{p}[0-9]*/bin/activate")
+
+        activates.sort(reverse=True, key=str.lower)
+
+        # Place sphinx_latest first, if it exists
+        for p in self.virtenv_prefix:
+            activates = glob(f"{cwd}/{p}*latest/bin/activate") + activates
+
+        ver = (0, 0, 0)
+        for f in activates:
+            # Discard too old Sphinx virtual environments
+            match = re.search(r"(\d+)\.(\d+)\.(\d+)", f)
+            if match:
+                ver = (int(match.group(1)), int(match.group(2)), int(match.group(3)))
+
+                if ver < self.min_version:
+                    continue
+
+            sphinx_cmd = f.replace("activate", "sphinx-build")
+            if not os.path.isfile(sphinx_cmd):
+                continue
+
+            ver = self.get_sphinx_version(sphinx_cmd)
+
+            if not ver:
+                venv_dir = f.replace("/bin/activate", "")
+                print(f"Warning: virtual environment {venv_dir} is not working.\n" \
+                      "Python version upgrade? Remove it with:\n\n" \
+                      "\trm -rf {venv_dir}\n\n")
+            else:
+                if self.need_sphinx and ver >= self.min_version:
+                    return (f, ver)
+                elif PythonVersion.parse_version(ver) > self.cur_version:
+                    return (f, ver)
+
+        return ("", ver)
+
+    def recommend_sphinx_upgrade(self):
+        """
+        Check if Sphinx needs to be upgraded.
+
+        Returns a tuple with the higest available Sphinx version if found.
+        Otherwise, returns None to indicate either that no upgrade is needed
+        or no venv was found.
+        """
+
+        # Avoid running sphinx-builds from venv if cur_version is good
+        if self.cur_version and self.cur_version >= RECOMMENDED_VERSION:
+            self.latest_avail_ver = self.cur_version
+            return None
+
+        # Get the highest version from sphinx_*/bin/sphinx-build and the
+        # corresponding command to activate the venv/virtenv
+        self.activate_cmd, self.venv_ver = self.get_virtenv()
+
+        # Store the highest version from Sphinx existing virtualenvs
+        if self.activate_cmd and self.venv_ver > self.cur_version:
+            self.latest_avail_ver = self.venv_ver
+        else:
+            if self.cur_version:
+                self.latest_avail_ver = self.cur_version
+            else:
+                self.latest_avail_ver = (0, 0, 0)
+
+        # As we don't know package version of Sphinx, and there's no
+        # virtual environments, don't check if upgrades are needed
+        if not self.virtualenv:
+            if not self.latest_avail_ver:
+                return None
+
+            return self.latest_avail_ver
+
+        # Either there are already a virtual env or a new one should be created
+        self.need_pip = True
+
+        if not self.latest_avail_ver:
+            return None
+
+        # Return if the reason is due to an upgrade or not
+        if self.latest_avail_ver != (0, 0, 0):
+            if self.latest_avail_ver < RECOMMENDED_VERSION:
+                self.rec_sphinx_upgrade = 1
+
+        return self.latest_avail_ver
+
+    def recommend_package(self):
+        """
+        Recommend installing Sphinx as a distro-specific package.
+        """
+
+        print("\n2) As a package with:")
+
+        old_need = self.deps.need
+        old_optional = self.deps.optional
+
+        self.pdf = False
+        self.deps.optional = 0
+        old_verbose = self.verbose_warn_install
+        self.verbose_warn_install = 0
+
+        self.deps.clear_deps()
+
+        self.deps.add_package("python-sphinx", DepManager.PYTHON_MANDATORY)
+
+        cmd = self.get_install()
+        if cmd:
+            print(cmd)
+
+        self.deps.need = old_need
+        self.deps.optional = old_optional
+        self.verbose_warn_install = old_verbose
+
+    def recommend_sphinx_version(self, virtualenv_cmd):
+        """
+        Provide recommendations for installing or upgrading Sphinx based
+        on current version.
+
+        The logic here is complex, as it have to deal with different versions:
+
+        - minimal supported version;
+        - minimal PDF version;
+        - recommended version.
+
+        It also needs to work fine with both distro's package and
+        venv/virtualenv
+        """
+
+        if self.recommend_python:
+            cur_ver = sys.version_info[:3]
+            if cur_ver < MIN_PYTHON_VERSION:
+                print(f"\nPython version {cur_ver} is incompatible with doc build.\n" \
+                    "Please upgrade it and re-run.\n")
+                return
+
+        # Version is OK. Nothing to do.
+        if self.cur_version != (0, 0, 0) and self.cur_version >= RECOMMENDED_VERSION:
+            return
+
+        if self.latest_avail_ver:
+            latest_avail_ver = PythonVersion.ver_str(self.latest_avail_ver)
+
+        if not self.need_sphinx:
+            # sphinx-build is present and its version is >= $min_version
+
+            # only recommend enabling a newer virtenv version if makes sense.
+            if self.latest_avail_ver and self.latest_avail_ver > self.cur_version:
+                print(f"\nYou may also use the newer Sphinx version {latest_avail_ver} with:")
+                if f"{self.virtenv_prefix}" in os.getcwd():
+                    print("\tdeactivate")
+                print(f"\t. {self.activate_cmd}")
+                self.deactivate_help()
+                return
+
+            if self.latest_avail_ver and self.latest_avail_ver >= RECOMMENDED_VERSION:
+                return
+
+        if not self.virtualenv:
+            # No sphinx either via package or via virtenv. As we can't
+            # Compare the versions here, just return, recommending the
+            # user to install it from the package distro.
+            if not self.latest_avail_ver or self.latest_avail_ver == (0, 0, 0):
+                return
+
+            # User doesn't want a virtenv recommendation, but he already
+            # installed one via virtenv with a newer version.
+            # So, print commands to enable it
+            if self.latest_avail_ver > self.cur_version:
+                print(f"\nYou may also use the Sphinx virtualenv version {latest_avail_ver} with:")
+                if f"{self.virtenv_prefix}" in os.getcwd():
+                    print("\tdeactivate")
+                print(f"\t. {self.activate_cmd}")
+                self.deactivate_help()
+                return
+            print("\n")
+        else:
+            if self.need_sphinx:
+                self.deps.need += 1
+
+        # Suggest newer versions if current ones are too old
+        if self.latest_avail_ver and self.latest_avail_ver >= self.min_version:
+            if self.latest_avail_ver >= RECOMMENDED_VERSION:
+                print(f"\nNeed to activate Sphinx (version {latest_avail_ver}) on virtualenv with:")
+                print(f"\t. {self.activate_cmd}")
+                self.deactivate_help()
+                return
+
+            # Version is above the minimal required one, but may be
+            # below the recommended one. So, print warnings/notes
+            if self.latest_avail_ver < RECOMMENDED_VERSION:
+                print(f"Warning: It is recommended at least Sphinx version {RECOMMENDED_VERSION}.")
+
+        # At this point, either it needs Sphinx or upgrade is recommended,
+        # both via pip
+
+        if self.rec_sphinx_upgrade:
+            if not self.virtualenv:
+                print("Instead of install/upgrade Python Sphinx pkg, you could use pip/pypi with:\n\n")
+            else:
+                print("To upgrade Sphinx, use:\n\n")
+        else:
+            print("\nSphinx needs to be installed either:\n1) via pip/pypi with:\n")
+
+        if not virtualenv_cmd:
+            print("   Currently not possible.\n")
+            print("   Please upgrade Python to a newer version and run this script again")
+        else:
+            print(f"\t{virtualenv_cmd} {self.virtenv_dir}")
+            print(f"\t. {self.virtenv_dir}/bin/activate")
+            print(f"\tpip install -r {self.requirement_file}")
+            self.deactivate_help()
+
+        if self.package_supported:
+            self.recommend_package()
+
+        print("\n" \
+              "   Please note that Sphinx currentlys produce false-positive\n" \
+              "   warnings when the same name is used for more than one type (functions,\n" \
+              "   structs, enums,...). This is known Sphinx bug. For more details, see:\n" \
+              "\thttps://github.com/sphinx-doc/sphinx/pull/8313")
+
+    def check_needs(self):
+        """
+        Main method that checks needed dependencies and provides
+        recommendations.
+        """
+        self.python_cmd = sys.executable
+
+        # Check if Sphinx is already accessible from current environment
+        self.check_sphinx(self.conf)
+
+        if self.system_release:
+            print(f"Detected OS: {self.system_release}.")
+        else:
+            print("Unknown OS")
+        if self.cur_version != (0, 0, 0):
+            ver = PythonVersion.ver_str(self.cur_version)
+            print(f"Sphinx version: {ver}\n")
+
+        # Check the type of virtual env, depending on Python version
+        virtualenv_cmd = None
+
+        if sys.version_info < MIN_PYTHON_VERSION:
+            min_ver = ver_str(MIN_PYTHON_VERSION)
+            print(f"ERROR: at least python {min_ver} is required to build the kernel docs")
+            self.need_sphinx = 1
+
+        self.venv_ver = self.recommend_sphinx_upgrade()
+
+        if self.need_pip:
+            if sys.version_info < MIN_PYTHON_VERSION:
+                self.need_pip = False
+                print("Warning: python version is not supported.")
+            else:
+                virtualenv_cmd = f"{self.python_cmd} -m venv"
+                self.check_python_module("ensurepip")
+
+        # Check for needed programs/tools
+        self.check_perl_module("Pod::Usage", DepManager.SYSTEM_MANDATORY)
+
+        self.check_program("make", DepManager.SYSTEM_MANDATORY)
+        self.check_program("which", DepManager.SYSTEM_MANDATORY)
+
+        self.check_program("dot", DepManager.SYSTEM_OPTIONAL)
+        self.check_program("convert", DepManager.SYSTEM_OPTIONAL)
+
+        self.check_python_module("yaml")
+
+        if self.pdf:
+            self.check_program("xelatex", DepManager.PDF_MANDATORY)
+            self.check_program("rsvg-convert", DepManager.PDF_MANDATORY)
+            self.check_program("latexmk", DepManager.PDF_MANDATORY)
+
+        # Do distro-specific checks and output distro-install commands
+        cmd = self.get_install()
+        if cmd:
+            print(cmd)
+
+        # If distro requires some special instructions, print here.
+        # Please notice that get_install() needs to be called first.
+        if self.distro_msg:
+            print("\n" + self.distro_msg)
+
+        if not self.python_cmd:
+            if self.need == 1:
+                sys.exit("Can't build as 1 mandatory dependency is missing")
+            elif self.need:
+                sys.exit(f"Can't build as {self.need} mandatory dependencies are missing")
+
+        # Check if sphinx-build is called sphinx-build-3
+        if self.need_symlink:
+            sphinx_path = self.which("sphinx-build-3")
+            if sphinx_path:
+                print(f"\tsudo ln -sf {sphinx_path} /usr/bin/sphinx-build\n")
+
+        self.recommend_sphinx_version(virtualenv_cmd)
+        print("")
+
+        if not self.deps.optional:
+            print("All optional dependencies are met.")
+
+        if self.deps.need == 1:
+            sys.exit("Can't build as 1 mandatory dependency is missing")
+        elif self.deps.need:
+            sys.exit(f"Can't build as {self.deps.need} mandatory dependencies are missing")
+
+        print("Needed package dependencies are met.")
+
+DESCRIPTION = """
+Process some flags related to Sphinx installation and documentation build.
+"""
+
+
+def main():
+    """Main function"""
+    parser = argparse.ArgumentParser(description=DESCRIPTION)
+
+    parser.add_argument(
+        "--no-virtualenv",
+        action="store_false",
+        dest="virtualenv",
+        help="Recommend installing Sphinx instead of using a virtualenv",
+    )
+
+    parser.add_argument(
+        "--no-pdf",
+        action="store_false",
+        dest="pdf",
+        help="Don't check for dependencies required to build PDF docs",
+    )
+
+    parser.add_argument(
+        "--version-check",
+        action="store_true",
+        dest="version_check",
+        help="If version is compatible, don't check for missing dependencies",
+    )
+
+    args = parser.parse_args()
+
+    checker = SphinxDependencyChecker(args)
+
+    PythonVersion.check_python(MIN_PYTHON_VERSION,
+                               bail_out=True, success_on_error=True)
+    checker.check_needs()
+
+# Call main if not used as module
+if __name__ == "__main__":
+    main()
diff --git a/tools/docs/test_doc_build.py b/tools/docs/test_doc_build.py
new file mode 100755
index 000000000000..47b4606569f9
--- /dev/null
+++ b/tools/docs/test_doc_build.py
@@ -0,0 +1,513 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=R0903,R0912,R0913,R0914,R0917,C0301
+
+"""
+Install minimal supported requirements for different Sphinx versions
+and optionally test the build.
+"""
+
+import argparse
+import asyncio
+import os.path
+import shutil
+import sys
+import time
+import subprocess
+
+# Minimal python version supported by the building system.
+
+PYTHON = os.path.basename(sys.executable)
+
+min_python_bin = None
+
+for i in range(9, 13):
+    p = f"python3.{i}"
+    if shutil.which(p):
+        min_python_bin = p
+        break
+
+if not min_python_bin:
+    min_python_bin = PYTHON
+
+# Starting from 8.0, Python 3.9 is not supported anymore.
+PYTHON_VER_CHANGES = {(8, 0, 0): PYTHON}
+
+DEFAULT_VERSIONS_TO_TEST = [
+    (3, 4, 3),   # Minimal supported version
+    (5, 3, 0),   # CentOS Stream 9 / AlmaLinux 9
+    (6, 1, 1),   # Debian 12
+    (7, 2, 1),   # openSUSE Leap 15.6
+    (7, 2, 6),   # Ubuntu 24.04 LTS
+    (7, 4, 7),   # Ubuntu 24.10
+    (7, 3, 0),   # openSUSE Tumbleweed
+    (8, 1, 3),   # Fedora 42
+    (8, 2, 3)    # Latest version - covers rolling distros
+]
+
+# Sphinx versions to be installed and their incremental requirements
+SPHINX_REQUIREMENTS = {
+    # Oldest versions we support for each package required by Sphinx 3.4.3
+    (3, 4, 3): {
+        "docutils": "0.16",
+        "alabaster": "0.7.12",
+        "babel": "2.8.0",
+        "certifi": "2020.6.20",
+        "docutils": "0.16",
+        "idna": "2.10",
+        "imagesize": "1.2.0",
+        "Jinja2": "2.11.2",
+        "MarkupSafe": "1.1.1",
+        "packaging": "20.4",
+        "Pygments": "2.6.1",
+        "PyYAML": "5.1",
+        "requests": "2.24.0",
+        "snowballstemmer": "2.0.0",
+        "sphinxcontrib-applehelp": "1.0.2",
+        "sphinxcontrib-devhelp": "1.0.2",
+        "sphinxcontrib-htmlhelp": "1.0.3",
+        "sphinxcontrib-jsmath": "1.0.1",
+        "sphinxcontrib-qthelp": "1.0.3",
+        "sphinxcontrib-serializinghtml": "1.1.4",
+        "urllib3": "1.25.9",
+    },
+
+    # Update package dependencies to a more modern base. The goal here
+    # is to avoid to many incremental changes for the next entries
+    (3, 5, 0): {
+        "alabaster": "0.7.13",
+        "babel": "2.17.0",
+        "certifi": "2025.6.15",
+        "idna": "3.10",
+        "imagesize": "1.4.1",
+        "packaging": "25.0",
+        "Pygments": "2.8.1",
+        "requests": "2.32.4",
+        "snowballstemmer": "3.0.1",
+        "sphinxcontrib-applehelp": "1.0.4",
+        "sphinxcontrib-htmlhelp": "2.0.1",
+        "sphinxcontrib-serializinghtml": "1.1.5",
+        "urllib3": "2.0.0",
+    },
+
+    # Starting from here, ensure all docutils versions are covered with
+    # supported Sphinx versions. Other packages are upgraded only when
+    # required by pip
+    (4, 0, 0): {
+        "PyYAML": "5.1",
+    },
+    (4, 1, 0): {
+        "docutils": "0.17",
+        "Pygments": "2.19.1",
+        "Jinja2": "3.0.3",
+        "MarkupSafe": "2.0",
+    },
+    (4, 3, 0): {},
+    (4, 4, 0): {},
+    (4, 5, 0): {
+        "docutils": "0.17.1",
+    },
+    (5, 0, 0): {},
+    (5, 1, 0): {},
+    (5, 2, 0): {
+        "docutils": "0.18",
+        "Jinja2": "3.1.2",
+        "MarkupSafe": "2.0",
+        "PyYAML": "5.3.1",
+    },
+    (5, 3, 0): {
+        "docutils": "0.18.1",
+    },
+    (6, 0, 0): {},
+    (6, 1, 0): {},
+    (6, 2, 0): {
+        "PyYAML": "5.4.1",
+    },
+    (7, 0, 0): {},
+    (7, 1, 0): {},
+    (7, 2, 0): {
+        "docutils": "0.19",
+        "PyYAML": "6.0.1",
+        "sphinxcontrib-serializinghtml": "1.1.9",
+    },
+    (7, 2, 6): {
+        "docutils": "0.20",
+    },
+    (7, 3, 0): {
+        "alabaster": "0.7.14",
+        "PyYAML": "6.0.1",
+        "tomli": "2.0.1",
+    },
+    (7, 4, 0): {
+        "docutils": "0.20.1",
+        "PyYAML": "6.0.1",
+    },
+    (8, 0, 0): {
+        "docutils": "0.21",
+    },
+    (8, 1, 0): {
+        "docutils": "0.21.1",
+        "PyYAML": "6.0.1",
+        "sphinxcontrib-applehelp": "1.0.7",
+        "sphinxcontrib-devhelp": "1.0.6",
+        "sphinxcontrib-htmlhelp": "2.0.6",
+        "sphinxcontrib-qthelp": "1.0.6",
+    },
+    (8, 2, 0): {
+        "docutils": "0.21.2",
+        "PyYAML": "6.0.1",
+        "sphinxcontrib-serializinghtml": "1.1.9",
+    },
+}
+
+
+class AsyncCommands:
+    """Excecute command synchronously"""
+
+    def __init__(self, fp=None):
+
+        self.stdout = None
+        self.stderr = None
+        self.output = None
+        self.fp = fp
+
+    def log(self, out, verbose, is_info=True):
+        out = out.removesuffix('\n')
+
+        if verbose:
+            if is_info:
+                print(out)
+            else:
+                print(out, file=sys.stderr)
+
+        if self.fp:
+            self.fp.write(out + "\n")
+
+    async def _read(self, stream, verbose, is_info):
+        """Ancillary routine to capture while displaying"""
+
+        while stream is not None:
+            line = await stream.readline()
+            if line:
+                out = line.decode("utf-8", errors="backslashreplace")
+                self.log(out, verbose, is_info)
+                if is_info:
+                    self.stdout += out
+                else:
+                    self.stderr += out
+            else:
+                break
+
+    async def run(self, cmd, capture_output=False, check=False,
+                  env=None, verbose=True):
+
+        """
+        Execute an arbitrary command, handling errors.
+
+        Please notice that this class is not thread safe
+        """
+
+        self.stdout = ""
+        self.stderr = ""
+
+        self.log("$ " + " ".join(cmd), verbose)
+
+        proc = await asyncio.create_subprocess_exec(cmd[0],
+                                                    *cmd[1:],
+                                                    env=env,
+                                                    stdout=asyncio.subprocess.PIPE,
+                                                    stderr=asyncio.subprocess.PIPE)
+
+        # Handle input and output in realtime
+        await asyncio.gather(
+            self._read(proc.stdout, verbose, True),
+            self._read(proc.stderr, verbose, False),
+        )
+
+        await proc.wait()
+
+        if check and proc.returncode > 0:
+            raise subprocess.CalledProcessError(returncode=proc.returncode,
+                                                cmd=" ".join(cmd),
+                                                output=self.stdout,
+                                                stderr=self.stderr)
+
+        if capture_output:
+            if proc.returncode > 0:
+                self.log(f"Error {proc.returncode}", verbose=True, is_info=False)
+                return ""
+
+            return self.output
+
+        ret = subprocess.CompletedProcess(args=cmd,
+                                          returncode=proc.returncode,
+                                          stdout=self.stdout,
+                                          stderr=self.stderr)
+
+        return ret
+
+
+class SphinxVenv:
+    """
+    Installs Sphinx on one virtual env per Sphinx version with a minimal
+    set of dependencies, adjusting them to each specific version.
+    """
+
+    def __init__(self):
+        """Initialize instance variables"""
+
+        self.built_time = {}
+        self.first_run = True
+
+    async def _handle_version(self, args, fp,
+                              cur_ver, cur_requirements, python_bin):
+        """Handle a single Sphinx version"""
+
+        cmd = AsyncCommands(fp)
+
+        ver = ".".join(map(str, cur_ver))
+
+        if not self.first_run and args.wait_input and args.build:
+            ret = input("Press Enter to continue or 'a' to abort: ").strip().lower()
+            if ret == "a":
+                print("Aborted.")
+                sys.exit()
+        else:
+            self.first_run = False
+
+        venv_dir = f"Sphinx_{ver}"
+        req_file = f"requirements_{ver}.txt"
+
+        cmd.log(f"\nSphinx {ver} with {python_bin}", verbose=True)
+
+        # Create venv
+        await cmd.run([python_bin, "-m", "venv", venv_dir],
+                      verbose=args.verbose, check=True)
+        pip = os.path.join(venv_dir, "bin/pip")
+
+        # Create install list
+        reqs = []
+        for pkg, verstr in cur_requirements.items():
+            reqs.append(f"{pkg}=={verstr}")
+
+        reqs.append(f"Sphinx=={ver}")
+
+        await cmd.run([pip, "install"] + reqs, check=True, verbose=args.verbose)
+
+        # Freeze environment
+        result = await cmd.run([pip, "freeze"], verbose=False, check=True)
+
+        # Pip install succeeded. Write requirements file
+        if args.req_file:
+            with open(req_file, "w", encoding="utf-8") as fp:
+                fp.write(result.stdout)
+
+        if args.build:
+            start_time = time.time()
+
+            # Prepare a venv environment
+            env = os.environ.copy()
+            bin_dir = os.path.join(venv_dir, "bin")
+            env["PATH"] = bin_dir + ":" + env["PATH"]
+            env["VIRTUAL_ENV"] = venv_dir
+            if "PYTHONHOME" in env:
+                del env["PYTHONHOME"]
+
+            # Test doc build
+            await cmd.run(["make", "cleandocs"], env=env, check=True)
+            make = ["make"]
+
+            if args.output:
+                sphinx_build = os.path.realpath(f"{bin_dir}/sphinx-build")
+                make += [f"O={args.output}", f"SPHINXBUILD={sphinx_build}"]
+
+            if args.make_args:
+                make += args.make_args
+
+            make += args.targets
+
+            if args.verbose:
+                cmd.log(f". {bin_dir}/activate", verbose=True)
+            await cmd.run(make, env=env, check=True, verbose=True)
+            if args.verbose:
+                cmd.log("deactivate", verbose=True)
+
+            end_time = time.time()
+            elapsed_time = end_time - start_time
+            hours, minutes = divmod(elapsed_time, 3600)
+            minutes, seconds = divmod(minutes, 60)
+
+            hours = int(hours)
+            minutes = int(minutes)
+            seconds = int(seconds)
+
+            self.built_time[ver] = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+
+            cmd.log(f"Finished doc build for Sphinx {ver}. Elapsed time: {self.built_time[ver]}", verbose=True)
+
+    async def run(self, args):
+        """
+        Navigate though multiple Sphinx versions, handling each of them
+        on a loop.
+        """
+
+        if args.log:
+            fp = open(args.log, "w", encoding="utf-8")
+            if not args.verbose:
+                args.verbose = False
+        else:
+            fp = None
+            if not args.verbose:
+                args.verbose = True
+
+        cur_requirements = {}
+        python_bin = min_python_bin
+
+        vers = set(SPHINX_REQUIREMENTS.keys()) | set(args.versions)
+
+        for cur_ver in sorted(vers):
+            if cur_ver in SPHINX_REQUIREMENTS:
+                new_reqs = SPHINX_REQUIREMENTS[cur_ver]
+                cur_requirements.update(new_reqs)
+
+            if cur_ver in PYTHON_VER_CHANGES:          # pylint: disable=R1715
+                python_bin = PYTHON_VER_CHANGES[cur_ver]
+
+            if cur_ver not in args.versions:
+                continue
+
+            if args.min_version:
+                if cur_ver < args.min_version:
+                    continue
+
+            if args.max_version:
+                if cur_ver > args.max_version:
+                    break
+
+            await self._handle_version(args, fp, cur_ver, cur_requirements,
+                                       python_bin)
+
+        if args.build:
+            cmd = AsyncCommands(fp)
+            cmd.log("\nSummary:", verbose=True)
+            for ver, elapsed_time in sorted(self.built_time.items()):
+                cmd.log(f"\tSphinx {ver} elapsed time: {elapsed_time}",
+                        verbose=True)
+
+        if fp:
+            fp.close()
+
+def parse_version(ver_str):
+    """Convert a version string into a tuple."""
+
+    return tuple(map(int, ver_str.split(".")))
+
+
+DEFAULT_VERS = "    - "
+DEFAULT_VERS += "\n    - ".join(map(lambda v: f"{v[0]}.{v[1]}.{v[2]}",
+                                    DEFAULT_VERSIONS_TO_TEST))
+
+SCRIPT = os.path.relpath(__file__)
+
+DESCRIPTION = f"""
+This tool allows creating Python virtual environments for different
+Sphinx versions that are supported by the Linux Kernel build system.
+
+Besides creating the virtual environment, it can also test building
+the documentation using "make htmldocs" (and/or other doc targets).
+
+If called without "--versions" argument, it covers the versions shipped
+on major distros, plus the lowest supported version:
+
+{DEFAULT_VERS}
+
+A typical usage is to run:
+
+   {SCRIPT} -m -l sphinx_builds.log
+
+This will create one virtual env for the default version set and run
+"make htmldocs" for each version, creating a log file with the
+excecuted commands on it.
+
+NOTE: The build time can be very long, specially on old versions. Also, there
+is a known bug with Sphinx version 6.0.x: each subprocess uses a lot of
+memory. That, together with "-jauto" may cause OOM killer to cause
+failures at the doc generation. To minimize the risk, you may use the
+"-a" command line parameter to constrain the built directories and/or
+reduce the number of threads from "-jauto" to, for instance, "-j4":
+
+    {SCRIPT} -m -V 6.0.1 -a "SPHINXDIRS=process" "SPHINXOPTS='-j4'"
+
+"""
+
+MAKE_TARGETS = [
+    "htmldocs",
+    "texinfodocs",
+    "infodocs",
+    "latexdocs",
+    "pdfdocs",
+    "epubdocs",
+    "xmldocs",
+]
+
+async def main():
+    """Main program"""
+
+    parser = argparse.ArgumentParser(description=DESCRIPTION,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    ver_group = parser.add_argument_group("Version range options")
+
+    ver_group.add_argument('-V', '--versions', nargs="*",
+                           default=DEFAULT_VERSIONS_TO_TEST,type=parse_version,
+                           help='Sphinx versions to test')
+    ver_group.add_argument('--min-version', "--min", type=parse_version,
+                           help='Sphinx minimal version')
+    ver_group.add_argument('--max-version', "--max", type=parse_version,
+                           help='Sphinx maximum version')
+    ver_group.add_argument('-f', '--full', action='store_true',
+                           help='Add all Sphinx (major,minor) supported versions to the version range')
+
+    build_group = parser.add_argument_group("Build options")
+
+    build_group.add_argument('-b', '--build', action='store_true',
+                             help='Build documentation')
+    build_group.add_argument('-a', '--make-args', nargs="*",
+                             help='extra arguments for make, like SPHINXDIRS=netlink/specs',
+                        )
+    build_group.add_argument('-t', '--targets', nargs="+", choices=MAKE_TARGETS,
+                             default=[MAKE_TARGETS[0]],
+                             help="make build targets. Default: htmldocs.")
+    build_group.add_argument("-o", '--output',
+                             help="output directory for the make O=OUTPUT")
+
+    other_group = parser.add_argument_group("Other options")
+
+    other_group.add_argument('-r', '--req-file', action='store_true',
+                             help='write a requirements.txt file')
+    other_group.add_argument('-l', '--log',
+                             help='Log command output on a file')
+    other_group.add_argument('-v', '--verbose', action='store_true',
+                             help='Verbose all commands')
+    other_group.add_argument('-i', '--wait-input', action='store_true',
+                        help='Wait for an enter before going to the next version')
+
+    args = parser.parse_args()
+
+    if not args.make_args:
+        args.make_args = []
+
+    sphinx_versions = sorted(list(SPHINX_REQUIREMENTS.keys()))
+
+    if args.full:
+        args.versions += list(SPHINX_REQUIREMENTS.keys())
+
+    venv = SphinxVenv()
+    await venv.run(args)
+
+
+# Call main method
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index e974ec932ec1..35f33780ca6c 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -10,7 +10,7 @@
  *
  * Undefined if no set bit exists, so code should check against 0 first.
  */
-static __always_inline unsigned int generic___fls(unsigned long word)
+static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word)
 {
 	unsigned int num = BITS_PER_LONG - 1;
 
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index 26f3ce1dd6e4..8eed3437edb9 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -10,7 +10,7 @@
  * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
  */
 
-static __always_inline int generic_fls(unsigned int x)
+static __always_inline __attribute_const__ int generic_fls(unsigned int x)
 {
 	int r = 32;
 
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
index 866f2b2304ff..b5f58dd261a3 100644
--- a/tools/include/asm-generic/bitops/fls64.h
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -16,7 +16,7 @@
  * at position 64.
  */
 #if BITS_PER_LONG == 32
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
 {
 	__u32 h = x >> 32;
 	if (h)
@@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x)
 	return fls(x);
 }
 #elif BITS_PER_LONG == 64
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
 {
 	if (x == 0)
 		return 0;
diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h
index aaa8a0767aa3..c5a2fed49eb0 100644
--- a/tools/include/linux/interval_tree_generic.h
+++ b/tools/include/linux/interval_tree_generic.h
@@ -77,7 +77,7 @@ ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node,			      \
  *   Cond2: start <= ITLAST(node)					      \
  */									      \
 									      \
-static ITSTRUCT *							      \
+ITSTATIC ITSTRUCT *							      \
 ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last)	      \
 {									      \
 	while (true) {							      \
@@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last)	      \
 		if (ITSTART(node) <= last) {		/* Cond1 */	      \
 			if (start <= ITLAST(node))	/* Cond2 */	      \
 				return node;	/* node is leftmost match */  \
-			if (node->ITRB.rb_right) {			      \
-				node = rb_entry(node->ITRB.rb_right,	      \
-						ITSTRUCT, ITRB);	      \
-				if (start <= node->ITSUBTREE)		      \
-					continue;			      \
-			}						      \
+			node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \
+			continue;					      \
 		}							      \
 		return NULL;	/* No match */				      \
 	}								      \
diff --git a/tools/include/linux/livepatch_external.h b/tools/include/linux/livepatch_external.h
new file mode 100644
index 000000000000..138af19b0f5c
--- /dev/null
+++ b/tools/include/linux/livepatch_external.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * External livepatch interfaces for patch creation tooling
+ */
+
+#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_
+#define _LINUX_LIVEPATCH_EXTERNAL_H_
+
+#include <linux/types.h>
+
+#define KLP_RELOC_SEC_PREFIX		".klp.rela."
+#define KLP_SYM_PREFIX			".klp.sym."
+
+#define __KLP_PRE_PATCH_PREFIX		__klp_pre_patch_callback_
+#define __KLP_POST_PATCH_PREFIX		__klp_post_patch_callback_
+#define __KLP_PRE_UNPATCH_PREFIX	__klp_pre_unpatch_callback_
+#define __KLP_POST_UNPATCH_PREFIX	__klp_post_unpatch_callback_
+
+#define KLP_PRE_PATCH_PREFIX		__stringify(__KLP_PRE_PATCH_PREFIX)
+#define KLP_POST_PATCH_PREFIX		__stringify(__KLP_POST_PATCH_PREFIX)
+#define KLP_PRE_UNPATCH_PREFIX		__stringify(__KLP_PRE_UNPATCH_PREFIX)
+#define KLP_POST_UNPATCH_PREFIX		__stringify(__KLP_POST_UNPATCH_PREFIX)
+
+struct klp_object;
+
+typedef int (*klp_pre_patch_t)(struct klp_object *obj);
+typedef void (*klp_post_patch_t)(struct klp_object *obj);
+typedef void (*klp_pre_unpatch_t)(struct klp_object *obj);
+typedef void (*klp_post_unpatch_t)(struct klp_object *obj);
+
+/**
+ * struct klp_callbacks - pre/post live-(un)patch callback structure
+ * @pre_patch:		executed before code patching
+ * @post_patch:		executed after code patching
+ * @pre_unpatch:	executed before code unpatching
+ * @post_unpatch:	executed after code unpatching
+ * @post_unpatch_enabled:	flag indicating if post-unpatch callback
+ *				should run
+ *
+ * All callbacks are optional.  Only the pre-patch callback, if provided,
+ * will be unconditionally executed.  If the parent klp_object fails to
+ * patch for any reason, including a non-zero error status returned from
+ * the pre-patch callback, no further callbacks will be executed.
+ */
+struct klp_callbacks {
+	klp_pre_patch_t		pre_patch;
+	klp_post_patch_t	post_patch;
+	klp_pre_unpatch_t	pre_unpatch;
+	klp_post_unpatch_t	post_unpatch;
+	bool post_unpatch_enabled;
+};
+
+/*
+ * 'struct klp_{func,object}_ext' are compact "external" representations of
+ * 'struct klp_{func,object}'.   They are used by objtool for livepatch
+ * generation.  The structs are then read by the livepatch module and converted
+ * to the real structs before calling klp_enable_patch().
+ *
+ * TODO make these the official API for klp_enable_patch().  That should
+ * simplify livepatch's interface as well as its data structure lifetime
+ * management.
+ */
+struct klp_func_ext {
+	const char *old_name;
+	void *new_func;
+	unsigned long sympos;
+};
+
+struct klp_object_ext {
+	const char *name;
+	struct klp_func_ext *funcs;
+	struct klp_callbacks callbacks;
+	unsigned int nr_funcs;
+};
+
+#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */
diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h
index aceac94632c8..c6def4049b1a 100644
--- a/tools/include/linux/objtool_types.h
+++ b/tools/include/linux/objtool_types.h
@@ -67,4 +67,6 @@ struct unwind_hint {
 #define ANNOTYPE_REACHABLE		8
 #define ANNOTYPE_NOCFI			9
 
+#define ANNOTYPE_DATA_SPECIAL		1
+
 #endif /* _LINUX_OBJTOOL_TYPES_H */
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index 8499f509f03e..51ad3cf4fa82 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -44,6 +44,20 @@ static inline bool strstarts(const char *str, const char *prefix)
 	return strncmp(str, prefix, strlen(prefix)) == 0;
 }
 
+/*
+ * Checks if a string ends with another.
+ */
+static inline bool str_ends_with(const char *str, const char *substr)
+{
+	size_t len = strlen(str);
+	size_t sublen = strlen(substr);
+
+	if (sublen > len)
+		return false;
+
+	return !strcmp(str + len - sublen, substr);
+}
+
 extern char * __must_check skip_spaces(const char *);
 
 extern char *strim(char *);
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index 143c2d2c2ba6..8118e22844f1 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -23,7 +23,7 @@ else
 Q=@
 endif
 
-arch_file := arch-$(ARCH).h
+arch_files := arch.h $(wildcard arch-*.h)
 all_files := \
 		compiler.h \
 		crt.h \
@@ -33,6 +33,7 @@ all_files := \
 		errno.h \
 		fcntl.h \
 		getopt.h \
+		inttypes.h \
 		limits.h \
 		math.h \
 		nolibc.h \
@@ -56,12 +57,14 @@ all_files := \
 		sys/random.h \
 		sys/reboot.h \
 		sys/resource.h \
+		sys/select.h \
 		sys/stat.h \
 		sys/syscall.h \
 		sys/sysmacros.h \
 		sys/time.h \
 		sys/timerfd.h \
 		sys/types.h \
+		sys/uio.h \
 		sys/utsname.h \
 		sys/wait.h \
 		time.h \
@@ -79,7 +82,7 @@ help:
 	@echo "Supported targets under nolibc:"
 	@echo "  all                 call \"headers\""
 	@echo "  clean               clean the sysroot"
-	@echo "  headers             prepare a sysroot in tools/include/nolibc/sysroot"
+	@echo "  headers             prepare a multi-arch sysroot in \$${OUTPUT}sysroot"
 	@echo "  headers_standalone  like \"headers\", and also install kernel headers"
 	@echo "  help                this help"
 	@echo ""
@@ -90,18 +93,11 @@ help:
 	@echo "  OUTPUT  = $(OUTPUT)"
 	@echo ""
 
+# installs headers for all archs at once.
 headers:
-	$(Q)mkdir -p $(OUTPUT)sysroot
-	$(Q)mkdir -p $(OUTPUT)sysroot/include
-	$(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/
-	$(Q)if [ "$(ARCH)" = "i386" -o "$(ARCH)" = "x86_64" ]; then \
-		cat arch-x86.h;                 \
-	elif [ -e "$(arch_file)" ]; then        \
-		cat $(arch_file);               \
-	else                                    \
-		echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
-		exit 1;                         \
-	fi > $(OUTPUT)sysroot/include/arch.h
+	$(Q)mkdir -p "$(OUTPUT)sysroot"
+	$(Q)mkdir -p "$(OUTPUT)sysroot/include"
+	$(Q)cp --parents $(arch_files) $(all_files) "$(OUTPUT)sysroot/include/"
 
 headers_standalone: headers
 	$(Q)$(MAKE) -C $(srctree) headers
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 1f66e7e5a444..251c42579028 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -184,6 +184,7 @@
 	_arg1;                                                                \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
@@ -193,5 +194,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_ARCH_ARM_H */
diff --git a/tools/include/nolibc/arch-arm64.h b/tools/include/nolibc/arch-arm64.h
index 02a3f74c8ec8..080a55a7144e 100644
--- a/tools/include/nolibc/arch-arm64.h
+++ b/tools/include/nolibc/arch-arm64.h
@@ -141,6 +141,7 @@
 	_arg1;                                                                \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
@@ -150,4 +151,5 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 #endif /* _NOLIBC_ARCH_ARM64_H */
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 5511705303ea..c894176c3f89 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -142,6 +142,7 @@
 	_arg1;                                                                \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
@@ -151,5 +152,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_ARCH_LOONGARCH_H */
diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h
index 6dac1845f298..2a4fbada5e79 100644
--- a/tools/include/nolibc/arch-m68k.h
+++ b/tools/include/nolibc/arch-m68k.h
@@ -128,6 +128,7 @@
 	_num;                                                                 \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 void _start(void);
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
@@ -137,5 +138,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_ARCH_M68K_H */
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 0cbac63b249a..a72506ceec6b 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -245,6 +245,7 @@
 
 #endif /* _ABIO32 */
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code, note that it's called __start on MIPS */
 void __start(void);
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
@@ -266,5 +267,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_ARCH_MIPS_H */
diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index 204564bbcd32..e0c7e0b81f7c 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -183,6 +183,7 @@
 #endif
 #endif /* !__powerpc64__ */
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
@@ -215,5 +216,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 #endif
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_ARCH_POWERPC_H */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 885383a86c38..1c00cacf57e1 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -139,6 +139,7 @@
 	_arg1;                                                                \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
@@ -152,5 +153,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_ARCH_RISCV_H */
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index df4c3cc713ac..74125a254ce3 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -139,22 +139,19 @@
 	_arg1;								\
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
 	__asm__ volatile (
-#ifdef __s390x__
 		"lgr	%r2, %r15\n"          /* save stack pointer to %r2, as arg1 of _start_c */
 		"aghi	%r15, -160\n"         /* allocate new stackframe                        */
-#else
-		"lr	%r2, %r15\n"
-		"ahi	%r15, -96\n"
-#endif
 		"xc	0(8,%r15), 0(%r15)\n" /* clear backchain                                */
 		"brasl	%r14, _start_c\n"     /* transfer to c runtime                          */
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 struct s390_mmap_arg_struct {
 	unsigned long addr;
diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h
index a96b8914607e..7a421197d104 100644
--- a/tools/include/nolibc/arch-sh.h
+++ b/tools/include/nolibc/arch-sh.h
@@ -140,6 +140,7 @@
 	_ret;                                                                 \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void _start_wrapper(void);
 void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void)
@@ -158,5 +159,6 @@ void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _st
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_ARCH_SH_H */
diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h
index ca420d843e25..2ebb5686e105 100644
--- a/tools/include/nolibc/arch-sparc.h
+++ b/tools/include/nolibc/arch-sparc.h
@@ -152,6 +152,7 @@
 	_arg1;                                                                \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
@@ -169,6 +170,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 static pid_t getpid(void);
 
diff --git a/tools/include/nolibc/arch-x86.h b/tools/include/nolibc/arch-x86.h
index d3efc0c3b8ad..f6c43ac5377b 100644
--- a/tools/include/nolibc/arch-x86.h
+++ b/tools/include/nolibc/arch-x86.h
@@ -157,6 +157,7 @@
 	_eax;							\
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 /*
  * i386 System V ABI mandates:
@@ -176,6 +177,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #else /* !defined(__x86_64__) */
 
@@ -323,6 +325,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	_ret;                                                                 \
 })
 
+#ifndef NOLIBC_NO_RUNTIME
 /* startup code */
 /*
  * x86-64 System V ABI mandates:
@@ -340,6 +343,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
 	);
 	__nolibc_entrypoint_epilogue();
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 #define NOLIBC_ARCH_HAS_MEMMOVE
 void *memmove(void *dst, const void *src, size_t len);
@@ -351,7 +355,7 @@ void *memcpy(void *dst, const void *src, size_t len);
 void *memset(void *dst, int c, size_t len);
 
 __asm__ (
-".section .text.nolibc_memmove_memcpy\n"
+".pushsection .text.nolibc_memmove_memcpy\n"
 ".weak memmove\n"
 ".weak memcpy\n"
 "memmove:\n"
@@ -371,8 +375,9 @@ __asm__ (
 	"rep movsb\n\t"
 	"cld\n\t"
 	"retq\n"
+".popsection\n"
 
-".section .text.nolibc_memset\n"
+".pushsection .text.nolibc_memset\n"
 ".weak memset\n"
 "memset:\n"
 	"xchgl %eax, %esi\n\t"
@@ -381,6 +386,7 @@ __asm__ (
 	"rep stosb\n\t"
 	"popq  %rax\n\t"
 	"retq\n"
+".popsection\n"
 );
 
 #endif /* !defined(__x86_64__) */
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 426c89198135..a3adaf433f2c 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -3,15 +3,6 @@
  * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
  */
 
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry point). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
 #ifndef _NOLIBC_ARCH_H
 #define _NOLIBC_ARCH_H
 
@@ -27,7 +18,7 @@
 #include "arch-powerpc.h"
 #elif defined(__riscv)
 #include "arch-riscv.h"
-#elif defined(__s390x__) || defined(__s390__)
+#elif defined(__s390x__)
 #include "arch-s390.h"
 #elif defined(__loongarch__)
 #include "arch-loongarch.h"
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
index 369cfb5a0e78..87090bbc53e0 100644
--- a/tools/include/nolibc/compiler.h
+++ b/tools/include/nolibc/compiler.h
@@ -41,8 +41,8 @@
 #  define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
 #endif /* __nolibc_has_attribute(no_stack_protector) */
 
-#if __nolibc_has_attribute(fallthrough)
-#  define __nolibc_fallthrough do { } while (0); __attribute__((fallthrough))
+#if __nolibc_has_attribute(__fallthrough__)
+#  define __nolibc_fallthrough do { } while (0); __attribute__((__fallthrough__))
 #else
 #  define __nolibc_fallthrough do { } while (0)
 #endif /* __nolibc_has_attribute(fallthrough) */
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index 961cfe777c35..d9262998dae9 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -7,6 +7,8 @@
 #ifndef _NOLIBC_CRT_H
 #define _NOLIBC_CRT_H
 
+#ifndef NOLIBC_NO_RUNTIME
+
 #include "compiler.h"
 
 char **environ __attribute__((weak));
@@ -88,4 +90,5 @@ void _start_c(long *sp)
 	exit(exitcode);
 }
 
+#endif /* NOLIBC_NO_RUNTIME */
 #endif /* _NOLIBC_CRT_H */
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
index 758b95c48e7a..61a122a60327 100644
--- a/tools/include/nolibc/dirent.h
+++ b/tools/include/nolibc/dirent.h
@@ -86,9 +86,9 @@ int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
 	 * readdir() can only return one entry at a time.
 	 * Make sure the non-returned ones are not skipped.
 	 */
-	ret = lseek(fd, ldir->d_off, SEEK_SET);
-	if (ret == -1)
-		return errno;
+	ret = sys_lseek(fd, ldir->d_off, SEEK_SET);
+	if (ret < 0)
+		return -ret;
 
 	entry->d_ino = ldir->d_ino;
 	/* the destination should always be big enough */
diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h
index 217abb95264b..87565e3b6a33 100644
--- a/tools/include/nolibc/getopt.h
+++ b/tools/include/nolibc/getopt.h
@@ -78,7 +78,7 @@ int getopt(int argc, char * const argv[], const char *optstring)
 		return '?';
 	}
 	if (optstring[i] == ':') {
-		optarg = 0;
+		optarg = NULL;
 		if (optstring[i + 1] != ':' || __optpos) {
 			optarg = argv[optind++];
 			if (__optpos)
diff --git a/tools/include/nolibc/inttypes.h b/tools/include/nolibc/inttypes.h
new file mode 100644
index 000000000000..1977bd74bfeb
--- /dev/null
+++ b/tools/include/nolibc/inttypes.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "nolibc.h"
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index d2f5aa085f8e..272dfc961158 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -104,11 +104,13 @@
 #include "sys/random.h"
 #include "sys/reboot.h"
 #include "sys/resource.h"
+#include "sys/select.h"
 #include "sys/stat.h"
 #include "sys/syscall.h"
 #include "sys/sysmacros.h"
 #include "sys/time.h"
 #include "sys/timerfd.h"
+#include "sys/uio.h"
 #include "sys/utsname.h"
 #include "sys/wait.h"
 #include "ctype.h"
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index c71a2c257177..7123aa056cb0 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -9,6 +9,7 @@
 
 #include "compiler.h"
 
+#ifndef NOLIBC_NO_RUNTIME
 #if defined(_NOLIBC_STACKPROTECTOR)
 
 #include "sys.h"
@@ -49,5 +50,6 @@ static __no_stack_protector void __stack_chk_init(void)
 #else /* !defined(_NOLIBC_STACKPROTECTOR) */
 static void __stack_chk_init(void) {}
 #endif /* defined(_NOLIBC_STACKPROTECTOR) */
+#endif /* NOLIBC_NO_RUNTIME */
 
 #endif /* _NOLIBC_STACKPROTECTOR_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
index 2c1ad23b9b5c..392f4dd94158 100644
--- a/tools/include/nolibc/std.h
+++ b/tools/include/nolibc/std.h
@@ -20,13 +20,13 @@
 
 /* those are commonly provided by sys/types.h */
 typedef unsigned int          dev_t;
-typedef unsigned long         ino_t;
+typedef uint64_t              ino_t;
 typedef unsigned int         mode_t;
 typedef   signed int          pid_t;
 typedef unsigned int          uid_t;
 typedef unsigned int          gid_t;
 typedef unsigned long       nlink_t;
-typedef   signed long         off_t;
+typedef  int64_t              off_t;
 typedef   signed long     blksize_t;
 typedef   signed long      blkcnt_t;
 typedef __kernel_time_t      time_t;
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 7630234408c5..1f16dab2ac88 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -321,11 +321,13 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char
 				if (!outstr)
 					outstr="(null)";
 			}
-#ifndef NOLIBC_IGNORE_ERRNO
 			else if (c == 'm') {
+#ifdef NOLIBC_IGNORE_ERRNO
+				outstr = "unknown error";
+#else
 				outstr = strerror(errno);
-			}
 #endif /* NOLIBC_IGNORE_ERRNO */
+			}
 			else if (c == '%') {
 				/* queue it verbatim */
 				continue;
@@ -600,7 +602,11 @@ int sscanf(const char *str, const char *format, ...)
 static __attribute__((unused))
 void perror(const char *msg)
 {
+#ifdef NOLIBC_IGNORE_ERRNO
+	fprintf(stderr, "%s%sunknown error\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "");
+#else
 	fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+#endif
 }
 
 static __attribute__((unused))
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 5fd99a480f82..f184e108ed0a 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -100,6 +100,7 @@ void free(void *ptr)
 	munmap(heap, heap->len);
 }
 
+#ifndef NOLIBC_NO_RUNTIME
 /* getenv() tries to find the environment variable named <name> in the
  * environment array pointed to by global variable "environ" which must be
  * declared as a char **, and must be terminated by a NULL (it is recommended
@@ -122,6 +123,7 @@ char *getenv(const char *name)
 	}
 	return NULL;
 }
+#endif /* NOLIBC_NO_RUNTIME */
 
 static __attribute__((unused))
 void *malloc(size_t len)
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index 163a17e7dd38..4000926f44ac 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -93,6 +93,21 @@ void *memset(void *dst, int b, size_t len)
 }
 #endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */
 
+#ifndef NOLIBC_ARCH_HAS_MEMCHR
+static __attribute__((unused))
+void *memchr(const void *s, int c, size_t len)
+{
+	char *p = (char *)s;
+
+	while (len--) {
+		if (*p == (char)c)
+			return p;
+		p++;
+	}
+	return NULL;
+}
+#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCHR */
+
 static __attribute__((unused))
 char *strchr(const char *s, int c)
 {
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index c5564f57deec..847af1ccbdc9 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -106,7 +106,7 @@ static __attribute__((unused))
 void *sbrk(intptr_t inc)
 {
 	/* first call to find current end */
-	void *ret = sys_brk(0);
+	void *ret = sys_brk(NULL);
 
 	if (ret && sys_brk(ret + inc) == ret + inc)
 		return ret + inc;
@@ -118,6 +118,7 @@ void *sbrk(intptr_t inc)
 
 /*
  * int chdir(const char *path);
+ * int fchdir(int fildes);
  */
 
 static __attribute__((unused))
@@ -132,6 +133,18 @@ int chdir(const char *path)
 	return __sysret(sys_chdir(path));
 }
 
+static __attribute__((unused))
+int sys_fchdir(int fildes)
+{
+	return my_syscall1(__NR_fchdir, fildes);
+}
+
+static __attribute__((unused))
+int fchdir(int fildes)
+{
+	return __sysret(sys_fchdir(fildes));
+}
+
 
 /*
  * int chmod(const char *path, mode_t mode);
@@ -512,6 +525,7 @@ pid_t gettid(void)
 	return sys_gettid();
 }
 
+#ifndef NOLIBC_NO_RUNTIME
 static unsigned long getauxval(unsigned long key);
 
 /*
@@ -523,7 +537,7 @@ int getpagesize(void)
 {
 	return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT);
 }
-
+#endif /* NOLIBC_NO_RUNTIME */
 
 /*
  * uid_t getuid(void);
@@ -591,23 +605,20 @@ int link(const char *old, const char *new)
 static __attribute__((unused))
 off_t sys_lseek(int fd, off_t offset, int whence)
 {
-#if defined(__NR_lseek)
-	return my_syscall3(__NR_lseek, fd, offset, whence);
-#else
+#if defined(__NR_llseek)
 	__kernel_loff_t loff = 0;
 	off_t result;
 	int ret;
 
-	/* Only exists on 32bit where nolibc off_t is also 32bit */
-	ret = my_syscall5(__NR_llseek, fd, 0, offset, &loff, whence);
+	ret = my_syscall5(__NR_llseek, fd, offset >> 32, (uint32_t)offset, &loff, whence);
 	if (ret < 0)
 		result = ret;
-	else if (loff != (off_t)loff)
-		result = -EOVERFLOW;
 	else
 		result = loff;
 
 	return result;
+#else
+	return my_syscall3(__NR_lseek, fd, offset, whence);
 #endif
 }
 
@@ -756,51 +767,6 @@ int sched_yield(void)
 
 
 /*
- * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
- *            fd_set *except_fds, struct timeval *timeout);
- */
-
-static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
-	struct sel_arg_struct {
-		unsigned long n;
-		fd_set *r, *w, *e;
-		struct timeval *t;
-	} arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
-	return my_syscall1(__NR_select, &arg);
-#elif defined(__NR__newselect)
-	return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#elif defined(__NR_select)
-	return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout);
-#elif defined(__NR_pselect6)
-	struct timespec t;
-
-	if (timeout) {
-		t.tv_sec  = timeout->tv_sec;
-		t.tv_nsec = timeout->tv_usec * 1000;
-	}
-	return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#else
-	struct __kernel_timespec t;
-
-	if (timeout) {
-		t.tv_sec  = timeout->tv_sec;
-		t.tv_nsec = timeout->tv_usec * 1000;
-	}
-	return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#endif
-}
-
-static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-	return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
-}
-
-
-/*
  * int setpgid(pid_t pid, pid_t pgid);
  */
 
diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h
index c52463d6c18d..0e98325e7347 100644
--- a/tools/include/nolibc/sys/auxv.h
+++ b/tools/include/nolibc/sys/auxv.h
@@ -10,6 +10,8 @@
 #ifndef _NOLIBC_SYS_AUXV_H
 #define _NOLIBC_SYS_AUXV_H
 
+#ifndef NOLIBC_NO_RUNTIME
+
 #include "../crt.h"
 
 static __attribute__((unused))
@@ -38,4 +40,5 @@ unsigned long getauxval(unsigned long type)
 	return ret;
 }
 
+#endif /* NOLIBC_NO_RUNTIME */
 #endif /* _NOLIBC_SYS_AUXV_H */
diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h
index 5228751b458c..77084ac3405a 100644
--- a/tools/include/nolibc/sys/mman.h
+++ b/tools/include/nolibc/sys/mman.h
@@ -31,11 +31,6 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
 }
 #endif
 
-/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
- * which returns -1 upon error and still satisfy user land that checks for
- * MAP_FAILED.
- */
-
 static __attribute__((unused))
 void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
 {
diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h
index 4a1e435be669..38274c64a722 100644
--- a/tools/include/nolibc/sys/reboot.h
+++ b/tools/include/nolibc/sys/reboot.h
@@ -28,7 +28,7 @@ ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
 static __attribute__((unused))
 int reboot(int cmd)
 {
-	return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
+	return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, NULL));
 }
 
 #endif /* _NOLIBC_SYS_REBOOT_H */
diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h
new file mode 100644
index 000000000000..2a5619c01277
--- /dev/null
+++ b/tools/include/nolibc/sys/select.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SELECT_H
+#define _NOLIBC_SYS_SELECT_H
+
+#include <linux/time.h>
+#include <linux/unistd.h>
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE     256
+#endif
+
+#define FD_SETIDXMASK (8 * sizeof(unsigned long))
+#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
+
+/* for select() */
+typedef struct {
+	unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
+} fd_set;
+
+#define FD_CLR(fd, set) do {						\
+		fd_set *__set = (set);					\
+		int __fd = (fd);					\
+		if (__fd >= 0)						\
+			__set->fds[__fd / FD_SETIDXMASK] &=		\
+				~(1U << (__fd & FD_SETBITMASK));	\
+	} while (0)
+
+#define FD_SET(fd, set) do {						\
+		fd_set *__set = (set);					\
+		int __fd = (fd);					\
+		if (__fd >= 0)						\
+			__set->fds[__fd / FD_SETIDXMASK] |=		\
+				1 << (__fd & FD_SETBITMASK);		\
+	} while (0)
+
+#define FD_ISSET(fd, set) ({						\
+			fd_set *__set = (set);				\
+			int __fd = (fd);				\
+		int __r = 0;						\
+		if (__fd >= 0)						\
+			__r = !!(__set->fds[__fd / FD_SETIDXMASK] &	\
+1U << (__fd & FD_SETBITMASK));						\
+		__r;							\
+	})
+
+#define FD_ZERO(set) do {						\
+		fd_set *__set = (set);					\
+		int __idx;						\
+		int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
+		for (__idx = 0; __idx < __size; __idx++)		\
+			__set->fds[__idx] = 0;				\
+	} while (0)
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ *            fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+	struct sel_arg_struct {
+		unsigned long n;
+		fd_set *r, *w, *e;
+		struct timeval *t;
+	} arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+	return my_syscall1(__NR_select, &arg);
+#elif defined(__NR__newselect)
+	return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_select)
+	return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_pselect6)
+	struct timespec t;
+
+	if (timeout) {
+		t.tv_sec  = timeout->tv_sec;
+		t.tv_nsec = timeout->tv_usec * 1000;
+	}
+	return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#else
+	struct __kernel_timespec t;
+
+	if (timeout) {
+		t.tv_sec  = timeout->tv_sec;
+		t.tv_nsec = timeout->tv_usec * 1000;
+	}
+	return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+	return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
+}
+
+
+#endif /* _NOLIBC_SYS_SELECT_H */
diff --git a/tools/include/nolibc/sys/uio.h b/tools/include/nolibc/sys/uio.h
new file mode 100644
index 000000000000..7ad42b927d2f
--- /dev/null
+++ b/tools/include/nolibc/sys/uio.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * uio for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ * Copyright (C) 2025 Intel Corporation
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_UIO_H
+#define _NOLIBC_SYS_UIO_H
+
+#include "../sys.h"
+#include <linux/uio.h>
+
+
+/*
+ * ssize_t readv(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_readv(int fd, const struct iovec *iovec, int count)
+{
+	return my_syscall3(__NR_readv, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t readv(int fd, const struct iovec *iovec, int count)
+{
+	return __sysret(sys_readv(fd, iovec, count));
+}
+
+/*
+ * ssize_t writev(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_writev(int fd, const struct iovec *iovec, int count)
+{
+	return my_syscall3(__NR_writev, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t writev(int fd, const struct iovec *iovec, int count)
+{
+	return __sysret(sys_writev(fd, iovec, count));
+}
+
+
+#endif /* _NOLIBC_SYS_UIO_H */
diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h
index 4e66e1f7a03e..9d9319ba92cb 100644
--- a/tools/include/nolibc/sys/wait.h
+++ b/tools/include/nolibc/sys/wait.h
@@ -65,23 +65,29 @@ pid_t waitpid(pid_t pid, int *status, int options)
 
 	switch (info.si_code) {
 	case 0:
-		*status = 0;
+		if (status)
+			*status = 0;
 		break;
 	case CLD_EXITED:
-		*status = (info.si_status & 0xff) << 8;
+		if (status)
+			*status = (info.si_status & 0xff) << 8;
 		break;
 	case CLD_KILLED:
-		*status = info.si_status & 0x7f;
+		if (status)
+			*status = info.si_status & 0x7f;
 		break;
 	case CLD_DUMPED:
-		*status = (info.si_status & 0x7f) | 0x80;
+		if (status)
+			*status = (info.si_status & 0x7f) | 0x80;
 		break;
 	case CLD_STOPPED:
 	case CLD_TRAPPED:
-		*status = (info.si_status << 8) + 0x7f;
+		if (status)
+			*status = (info.si_status << 8) + 0x7f;
 		break;
 	case CLD_CONTINUED:
-		*status = 0xffff;
+		if (status)
+			*status = 0xffff;
 		break;
 	default:
 		return -1;
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
index 6c276b8d646a..48e78f8becf9 100644
--- a/tools/include/nolibc/time.h
+++ b/tools/include/nolibc/time.h
@@ -89,13 +89,11 @@ int sys_clock_settime(clockid_t clockid, struct timespec *tp)
 {
 #if defined(__NR_clock_settime)
 	return my_syscall2(__NR_clock_settime, clockid, tp);
-#elif defined(__NR_clock_settime64)
+#else
 	struct __kernel_timespec ktp;
 
 	__nolibc_timespec_user_to_kernel(tp, &ktp);
 	return my_syscall2(__NR_clock_settime64, clockid, &ktp);
-#else
-	return __nolibc_enosys(__func__, clockid, tp);
 #endif
 }
 
@@ -111,7 +109,7 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt
 {
 #if defined(__NR_clock_nanosleep)
 	return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp);
-#elif defined(__NR_clock_nanosleep_time64)
+#else
 	struct __kernel_timespec krqtp, krmtp;
 	int ret;
 
@@ -120,8 +118,6 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt
 	if (rmtp)
 		__nolibc_timespec_kernel_to_user(&krmtp, rmtp);
 	return ret;
-#else
-	return __nolibc_enosys(__func__, clockid, flags, rqtp, rmtp);
 #endif
 }
 
@@ -195,7 +191,7 @@ int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
 {
 #if defined(__NR_timer_gettime)
 	return my_syscall2(__NR_timer_gettime, timerid, curr_value);
-#elif defined(__NR_timer_gettime64)
+#else
 	struct __kernel_itimerspec kcurr_value;
 	int ret;
 
@@ -203,8 +199,6 @@ int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
 	__nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
 	__nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
 	return ret;
-#else
-	return __nolibc_enosys(__func__, timerid, curr_value);
 #endif
 }
 
@@ -220,7 +214,7 @@ int sys_timer_settime(timer_t timerid, int flags,
 {
 #if defined(__NR_timer_settime)
 	return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value);
-#elif defined(__NR_timer_settime64)
+#else
 	struct __kernel_itimerspec knew_value, kold_value;
 	int ret;
 
@@ -232,8 +226,6 @@ int sys_timer_settime(timer_t timerid, int flags,
 		__nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
 	}
 	return ret;
-#else
-	return __nolibc_enosys(__func__, timerid, flags, new_value, old_value);
 #endif
 }
 
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index 16c6e9ec9451..470a5f77bc0f 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -70,11 +70,6 @@
 #define DT_LNK         0xa
 #define DT_SOCK        0xc
 
-/* commonly an fd_set represents 256 FDs */
-#ifndef FD_SETSIZE
-#define FD_SETSIZE     256
-#endif
-
 /* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
  * values.
  */
@@ -115,48 +110,6 @@
 #define EXIT_SUCCESS 0
 #define EXIT_FAILURE 1
 
-#define FD_SETIDXMASK (8 * sizeof(unsigned long))
-#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
-
-/* for select() */
-typedef struct {
-	unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
-} fd_set;
-
-#define FD_CLR(fd, set) do {						\
-		fd_set *__set = (set);					\
-		int __fd = (fd);					\
-		if (__fd >= 0)						\
-			__set->fds[__fd / FD_SETIDXMASK] &=		\
-				~(1U << (__fd & FD_SETBITMASK));	\
-	} while (0)
-
-#define FD_SET(fd, set) do {						\
-		fd_set *__set = (set);					\
-		int __fd = (fd);					\
-		if (__fd >= 0)						\
-			__set->fds[__fd / FD_SETIDXMASK] |=		\
-				1 << (__fd & FD_SETBITMASK);		\
-	} while (0)
-
-#define FD_ISSET(fd, set) ({						\
-			fd_set *__set = (set);				\
-			int __fd = (fd);				\
-		int __r = 0;						\
-		if (__fd >= 0)						\
-			__r = !!(__set->fds[__fd / FD_SETIDXMASK] &	\
-1U << (__fd & FD_SETBITMASK));						\
-		__r;							\
-	})
-
-#define FD_ZERO(set) do {						\
-		fd_set *__set = (set);					\
-		int __idx;						\
-		int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
-		for (__idx = 0; __idx < __size; __idx++)		\
-			__set->fds[__idx] = 0;				\
-	} while (0)
-
 /* for getdents64() */
 struct linux_dirent64 {
 	uint64_t       d_ino;
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index 7405fa2b89ba..bb5e80f3f05d 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -54,7 +54,7 @@ int msleep(unsigned int msecs)
 {
 	struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
 
-	if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+	if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
 		return (my_timeval.tv_sec * 1000) +
 			(my_timeval.tv_usec / 1000) +
 			!!(my_timeval.tv_usec % 1000);
@@ -67,7 +67,7 @@ unsigned int sleep(unsigned int seconds)
 {
 	struct timeval my_timeval = { seconds, 0 };
 
-	if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+	if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
 		return my_timeval.tv_sec + !!my_timeval.tv_usec;
 	else
 		return 0;
@@ -78,7 +78,7 @@ int usleep(unsigned int usecs)
 {
 	struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
 
-	return sys_select(0, 0, 0, 0, &my_timeval);
+	return sys_select(0, NULL, NULL, NULL, &my_timeval);
 }
 
 static __attribute__((unused))
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index e63a71d3c607..3cd5cf15e3c9 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -597,35 +597,66 @@ struct drm_set_version {
 	int drm_dd_minor;
 };
 
-/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/**
+ * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl.
+ * @handle: Handle of the object to be closed.
+ * @pad: Padding.
+ *
+ * Releases the handle to an mm object.
+ */
 struct drm_gem_close {
-	/** Handle of the object to be closed. */
 	__u32 handle;
 	__u32 pad;
 };
 
-/* DRM_IOCTL_GEM_FLINK ioctl argument type */
+/**
+ * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl.
+ * @handle: Handle for the object being named.
+ * @name: Returned global name.
+ *
+ * Create a global name for an object, returning the name.
+ *
+ * Note that the name does not hold a reference; when the object
+ * is freed, the name goes away.
+ */
 struct drm_gem_flink {
-	/** Handle for the object being named */
 	__u32 handle;
-
-	/** Returned global name */
 	__u32 name;
 };
 
-/* DRM_IOCTL_GEM_OPEN ioctl argument type */
+/**
+ * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl.
+ * @name: Name of object being opened.
+ * @handle: Returned handle for the object.
+ * @size: Returned size of the object
+ *
+ * Open an object using the global name, returning a handle and the size.
+ *
+ * This handle (of course) holds a reference to the object, so the object
+ * will not go away until the handle is deleted.
+ */
 struct drm_gem_open {
-	/** Name of object being opened */
 	__u32 name;
-
-	/** Returned handle for the object */
 	__u32 handle;
-
-	/** Returned size of the object */
 	__u64 size;
 };
 
 /**
+ * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl.
+ * @handle: The handle of a gem object.
+ * @new_handle: An available gem handle.
+ *
+ * This ioctl changes the handle of a GEM object to the specified one.
+ * The new handle must be unused. On success the old handle is closed
+ * and all further IOCTL should refer to the new handle only.
+ * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
+ */
+struct drm_gem_change_handle {
+	__u32 handle;
+	__u32 new_handle;
+};
+
+/**
  * DRM_CAP_DUMB_BUFFER
  *
  * If set to 1, the driver supports creating dumb buffers via the
@@ -1309,6 +1340,14 @@ extern "C" {
  */
 #define DRM_IOCTL_SET_CLIENT_NAME	DRM_IOWR(0xD1, struct drm_set_client_name)
 
+/**
+ * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
+ *
+ * Some applications (notably CRIU) need objects to have specific gem handles.
+ * This ioctl changes the object at one gem handle to use a new gem handle.
+ */
+#define DRM_IOCTL_GEM_CHANGE_HANDLE    DRM_IOWR(0xD2, struct drm_gem_change_handle)
+
 /*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6829936d33f5..be7d8e060e10 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1026,6 +1026,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_USER_RINGBUF,
 	BPF_MAP_TYPE_CGRP_STORAGE,
 	BPF_MAP_TYPE_ARENA,
+	BPF_MAP_TYPE_INSN_ARRAY,
 	__MAX_BPF_MAP_TYPE
 };
 
@@ -1430,6 +1431,9 @@ enum {
 
 /* Do not translate kernel bpf_arena pointers to user pointers */
 	BPF_F_NO_USER_CONV	= (1U << 18),
+
+/* Enable BPF ringbuf overwrite mode */
+	BPF_F_RB_OVERWRITE	= (1U << 19),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -5618,7 +5622,7 @@ union bpf_attr {
  *	Return
  *		*sk* if casting is valid, or **NULL** otherwise.
  *
- * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
  *	Description
  *		Get a dynptr to local memory *data*.
  *
@@ -5661,7 +5665,7 @@ union bpf_attr {
  *	Return
  *		Nothing. Always succeeds.
  *
- * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
  *	Description
  *		Read *len* bytes from *src* into *dst*, starting from *offset*
  *		into *src*.
@@ -5671,7 +5675,7 @@ union bpf_attr {
  *		of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
  *		*flags* is not 0.
  *
- * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
  *	Description
  *		Write *len* bytes from *src* into *dst*, starting from *offset*
  *		into *dst*.
@@ -5692,7 +5696,7 @@ union bpf_attr {
  *		is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
  *		other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
  *
- * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
  *	Description
  *		Get a pointer to the underlying dynptr data.
  *
@@ -6231,6 +6235,7 @@ enum {
 	BPF_RB_RING_SIZE = 1,
 	BPF_RB_CONS_POS = 2,
 	BPF_RB_PROD_POS = 3,
+	BPF_RB_OVERWRITE_POS = 4,
 };
 
 /* BPF ring buffer constants */
@@ -7200,6 +7205,7 @@ enum {
 	TCP_BPF_SYN_MAC         = 1007, /* Copy the MAC, IP[46], and TCP header */
 	TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
 	SK_BPF_CB_FLAGS		= 1009, /* Get or set sock ops flags in socket */
+	SK_BPF_BYPASS_PROT_MEM	= 1010, /* Get or Set sk->sk_bypass_prot_mem */
 };
 
 enum {
@@ -7645,4 +7651,24 @@ enum bpf_kfunc_flags {
 	BPF_F_PAD_ZEROS = (1ULL << 0),
 };
 
+/*
+ * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
+ *
+ * Before the map is used the orig_off field should point to an
+ * instruction inside the program being loaded. The other fields
+ * must be set to 0.
+ *
+ * After the program is loaded, the xlated_off will be adjusted
+ * by the verifier to point to the index of the original instruction
+ * in the xlated program. If the instruction is deleted, it will
+ * be set to (u32)-1. The jitted_off will be set to the corresponding
+ * offset in the jitted image of the program.
+ */
+struct bpf_insn_array_value {
+	__u32 orig_off;
+	__u32 xlated_off;
+	__u32 jitted_off;
+	__u32 :32;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f0f0d49d2544..52f6000ab020 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -962,6 +962,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_ARM_EL2_E2H0 241
 #define KVM_CAP_RISCV_MP_STATE_RESET 242
 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
+#define KVM_CAP_GUEST_MEMFD_FLAGS 244
 
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
@@ -1598,6 +1599,8 @@ struct kvm_memory_attributes {
 #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
 
 #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
+#define GUEST_MEMFD_FLAG_MMAP		(1ULL << 0)
+#define GUEST_MEMFD_FLAG_INIT_SHARED	(1ULL << 1)
 
 struct kvm_create_guest_memfd {
 	__u64 size;
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 48eb49aa03d4..e0b579a1df4f 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -2,6 +2,7 @@
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN uapi header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
 
 #ifndef _UAPI_LINUX_NETDEV_H
 #define _UAPI_LINUX_NETDEV_H
@@ -80,6 +81,7 @@ enum netdev_qstats_scope {
 enum netdev_napi_threaded {
 	NETDEV_NAPI_THREADED_DISABLED,
 	NETDEV_NAPI_THREADED_ENABLED,
+	NETDEV_NAPI_THREADED_BUSY_POLL,
 };
 
 enum {
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
index 33c9b578b3b2..a25e38d1c874 100644
--- a/tools/include/uapi/linux/nsfs.h
+++ b/tools/include/uapi/linux/nsfs.h
@@ -53,6 +53,76 @@ enum init_ns_ino {
 	TIME_NS_INIT_INO	= 0xEFFFFFFAU,
 	NET_NS_INIT_INO		= 0xEFFFFFF9U,
 	MNT_NS_INIT_INO		= 0xEFFFFFF8U,
+#ifdef __KERNEL__
+	MNT_NS_ANON_INO		= 0xEFFFFFF7U,
+#endif
 };
 
+struct nsfs_file_handle {
+	__u64 ns_id;
+	__u32 ns_type;
+	__u32 ns_inum;
+};
+
+#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
+#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+
+enum init_ns_id {
+	IPC_NS_INIT_ID		= 1ULL,
+	UTS_NS_INIT_ID		= 2ULL,
+	USER_NS_INIT_ID		= 3ULL,
+	PID_NS_INIT_ID		= 4ULL,
+	CGROUP_NS_INIT_ID	= 5ULL,
+	TIME_NS_INIT_ID		= 6ULL,
+	NET_NS_INIT_ID		= 7ULL,
+	MNT_NS_INIT_ID		= 8ULL,
+#ifdef __KERNEL__
+	NS_LAST_INIT_ID		= MNT_NS_INIT_ID,
+#endif
+};
+
+enum ns_type {
+	TIME_NS    = (1ULL << 7),  /* CLONE_NEWTIME */
+	MNT_NS     = (1ULL << 17), /* CLONE_NEWNS */
+	CGROUP_NS  = (1ULL << 25), /* CLONE_NEWCGROUP */
+	UTS_NS     = (1ULL << 26), /* CLONE_NEWUTS */
+	IPC_NS     = (1ULL << 27), /* CLONE_NEWIPC */
+	USER_NS    = (1ULL << 28), /* CLONE_NEWUSER */
+	PID_NS     = (1ULL << 29), /* CLONE_NEWPID */
+	NET_NS     = (1ULL << 30), /* CLONE_NEWNET */
+};
+
+/**
+ * struct ns_id_req - namespace ID request structure
+ * @size: size of this structure
+ * @spare: reserved for future use
+ * @filter: filter mask
+ * @ns_id: last namespace id
+ * @user_ns_id: owning user namespace ID
+ *
+ * Structure for passing namespace ID and miscellaneous parameters to
+ * statns(2) and listns(2).
+ *
+ * For statns(2) @param represents the request mask.
+ * For listns(2) @param represents the last listed mount id (or zero).
+ */
+struct ns_id_req {
+	__u32 size;
+	__u32 spare;
+	__u64 ns_id;
+	struct /* listns */ {
+		__u32 ns_type;
+		__u32 spare2;
+		__u64 user_ns_id;
+	};
+};
+
+/*
+ * Special @user_ns_id value that can be passed to listns()
+ */
+#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
+
+/* List of all ns_id_req versions. */
+#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
+
 #endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 78a362b80027..d292f96bc06f 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -463,7 +463,9 @@ struct perf_event_attr {
 				inherit_thread :  1, /* children only inherit if cloned with CLONE_THREAD */
 				remove_on_exec :  1, /* event is removed from task on exec */
 				sigtrap        :  1, /* send synchronous SIGTRAP on event */
-				__reserved_1   : 26;
+				defer_callchain:  1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
+				defer_output   :  1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
+				__reserved_1   : 24;
 
 	union {
 		__u32		wakeup_events;	  /* wake up every n events */
@@ -1239,6 +1241,22 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_AUX_OUTPUT_HW_ID		= 21,
 
+	/*
+	 * This user callchain capture was deferred until shortly before
+	 * returning to user space.  Previous samples would have kernel
+	 * callchains only and they need to be stitched with this to make full
+	 * callchains.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				cookie;
+	 *	u64				nr;
+	 *	u64				ips[nr];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_CALLCHAIN_DEFERRED		= 22,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
@@ -1269,6 +1287,7 @@ enum perf_callchain_context {
 	PERF_CONTEXT_HV				= (__u64)-32,
 	PERF_CONTEXT_KERNEL			= (__u64)-128,
 	PERF_CONTEXT_USER			= (__u64)-512,
+	PERF_CONTEXT_USER_DEFERRED		= (__u64)-640,
 
 	PERF_CONTEXT_GUEST			= (__u64)-2048,
 	PERF_CONTEXT_GUEST_KERNEL		= (__u64)-2176,
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 339b19797237..b66f5fbfbbb2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -154,7 +154,7 @@ int bump_rlimit_memlock(void)
 
 	memlock_bumped = true;
 
-	/* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
+	/* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */
 	if (memlock_rlim == 0)
 		return 0;
 
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 80c028540656..d4e4e388e625 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -315,20 +315,20 @@ enum libbpf_tristate {
 			  ___param, sizeof(___param));		\
 })
 
-extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args,
-			      __u32 len__sz, void *aux__prog) __weak __ksym;
-
-#define bpf_stream_printk(stream_id, fmt, args...)				\
-({										\
-	static const char ___fmt[] = fmt;					\
-	unsigned long long ___param[___bpf_narg(args)];				\
-										\
-	_Pragma("GCC diagnostic push")						\
-	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")			\
-	___bpf_fill(___param, args);						\
-	_Pragma("GCC diagnostic pop")						\
-										\
-	bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\
+extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args,
+				   __u32 len__sz, void *aux__prog) __weak __ksym;
+
+#define bpf_stream_printk(stream_id, fmt, args...)					\
+({											\
+	static const char ___fmt[] = fmt;						\
+	unsigned long long ___param[___bpf_narg(args)];					\
+											\
+	_Pragma("GCC diagnostic push")							\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")				\
+	___bpf_fill(___param, args);							\
+	_Pragma("GCC diagnostic pop")							\
+											\
+	bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL);	\
 })
 
 /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index a8f6cd4841b0..dbe32a5d02cd 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -311,7 +311,7 @@ struct pt_regs___arm64 {
 #define __PT_RET_REG regs[31]
 #define __PT_FP_REG __unsupported__
 #define __PT_RC_REG gpr[3]
-#define __PT_SP_REG sp
+#define __PT_SP_REG gpr[1]
 #define __PT_IP_REG nip
 
 #elif defined(bpf_target_sparc)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 18907f0fcf9f..84a4b0abc8be 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1061,7 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
 	if (base_btf) {
 		btf->base_btf = base_btf;
 		btf->start_id = btf__type_cnt(base_btf);
-		btf->start_str_off = base_btf->hdr->str_len;
+		btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
 	}
 
 	if (is_mmap) {
@@ -3901,6 +3901,20 @@ err_out:
 	return err;
 }
 
+/*
+ * Calculate type signature hash of TYPEDEF, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static long btf_hash_typedef(struct btf_type *t)
+{
+	long h;
+
+	h = hash_combine(0, t->name_off);
+	h = hash_combine(h, t->info);
+	return h;
+}
+
 static long btf_hash_common(struct btf_type *t)
 {
 	long h;
@@ -3918,6 +3932,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
 	       t1->size == t2->size;
 }
 
+/* Check structural compatibility of two TYPEDEF. */
+static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2)
+{
+	return t1->name_off == t2->name_off &&
+	       t1->info == t2->info;
+}
+
 /* Calculate type signature hash of INT or TAG. */
 static long btf_hash_int_decl_tag(struct btf_type *t)
 {
@@ -4844,13 +4865,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
 	}
 }
 
+static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind)
+{
+	if (kind == BTF_KIND_TYPEDEF)
+		return btf_hash_typedef(t);
+	else
+		return btf_hash_struct(t);
+}
+
+static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind)
+{
+	if (kind == BTF_KIND_TYPEDEF)
+		return btf_equal_typedef(t1, t2);
+	else
+		return btf_shallow_equal_struct(t1, t2);
+}
+
 /*
- * Deduplicate struct/union types.
+ * Deduplicate struct/union and typedef types.
  *
  * For each struct/union type its type signature hash is calculated, taking
  * into account type's name, size, number, order and names of fields, but
  * ignoring type ID's referenced from fields, because they might not be deduped
- * completely until after reference types deduplication phase. This type hash
+ * completely until after reference types deduplication phase. For each typedef
+ * type, the hash is computed based on the type’s name and size. This type hash
  * is used to iterate over all potential canonical types, sharing same hash.
  * For each canonical candidate we check whether type graphs that they form
  * (through referenced types in fields and so on) are equivalent using algorithm
@@ -4882,18 +4920,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 	t = btf_type_by_id(d->btf, type_id);
 	kind = btf_kind(t);
 
-	if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+	if (kind != BTF_KIND_STRUCT &&
+		kind != BTF_KIND_UNION &&
+		kind != BTF_KIND_TYPEDEF)
 		return 0;
 
-	h = btf_hash_struct(t);
+	h = btf_hash_by_kind(t, kind);
 	for_each_dedup_cand(d, hash_entry, h) {
 		__u32 cand_id = hash_entry->value;
 		int eq;
 
 		/*
 		 * Even though btf_dedup_is_equiv() checks for
-		 * btf_shallow_equal_struct() internally when checking two
-		 * structs (unions) for equivalence, we need to guard here
+		 * btf_equal_by_kind() internally when checking two
+		 * structs (unions) or typedefs for equivalence, we need to guard here
 		 * from picking matching FWD type as a dedup candidate.
 		 * This can happen due to hash collision. In such case just
 		 * relying on btf_dedup_is_equiv() would lead to potentially
@@ -4901,7 +4941,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		 * FWD and compatible STRUCT/UNION are considered equivalent.
 		 */
 		cand_type = btf_type_by_id(d->btf, cand_id);
-		if (!btf_shallow_equal_struct(t, cand_type))
+		if (!btf_equal_by_kind(t, cand_type, kind))
 			continue;
 
 		btf_dedup_clear_hypot_map(d);
@@ -4939,18 +4979,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
 /*
  * Deduplicate reference type.
  *
- * Once all primitive and struct/union types got deduplicated, we can easily
+ * Once all primitive, struct/union and typedef types got deduplicated, we can easily
  * deduplicate all other (reference) BTF types. This is done in two steps:
  *
  * 1. Resolve all referenced type IDs into their canonical type IDs. This
- * resolution can be done either immediately for primitive or struct/union types
- * (because they were deduped in previous two phases) or recursively for
+ * resolution can be done either immediately for primitive, struct/union, and typedef
+ * types (because they were deduped in previous two phases) or recursively for
  * reference types. Recursion will always terminate at either primitive or
- * struct/union type, at which point we can "unwind" chain of reference types
- * one by one. There is no danger of encountering cycles because in C type
- * system the only way to form type cycle is through struct/union, so any chain
- * of reference types, even those taking part in a type cycle, will inevitably
- * reach struct/union at some point.
+ * struct/union and typedef types, at which point we can "unwind" chain of reference
+ * types one by one. There is no danger of encountering cycles in C, as the only way to
+ * form a type cycle is through struct or union types. Go can form such cycles through
+ * typedef. Thus, any chain of reference types, even those taking part in a type cycle,
+ * will inevitably reach a struct/union or typedef type at some point.
  *
  * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
  * becomes "stable", in the sense that no further deduplication will cause
@@ -4982,7 +5022,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_RESTRICT:
 	case BTF_KIND_PTR:
-	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_TYPE_TAG:
 		ref_type_id = btf_dedup_ref_type(d, t->type);
@@ -5818,7 +5857,7 @@ void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
 {
 	btf->base_btf = (struct btf *)base_btf;
 	btf->start_id = btf__type_cnt(base_btf);
-	btf->start_str_off = base_btf->hdr->str_len;
+	btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
 }
 
 int btf__relocate(struct btf *btf, const struct btf *base_btf)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index ccfd905f03df..cc01494d6210 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -94,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void);
  * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an
  * ELF BTF section except with a base BTF on top of which split BTF should be
  * based
+ * @param base_btf base BTF object
  * @return new BTF object instance which has to be eventually freed with
  * **btf__free()**
  *
@@ -115,6 +116,10 @@ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
  * When that split BTF is loaded against a (possibly changed) base, this
  * distilled base BTF will help update references to that (possibly changed)
  * base BTF.
+ * @param src_btf source split BTF object
+ * @param new_base_btf pointer to where the new base BTF object pointer will be stored
+ * @param new_split_btf pointer to where the new split BTF object pointer will be stored
+ * @return 0 on success; negative error code, otherwise
  *
  * Both the new split and its associated new base BTF must be freed by
  * the caller.
@@ -264,6 +269,9 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
  * to base BTF kinds, and verify those references are compatible with
  * *base_btf*; if they are, *btf* is adjusted such that is re-parented to
  * *base_btf* and type ids and strings are adjusted to accommodate this.
+ * @param btf split BTF object to relocate
+ * @param base_btf base BTF object
+ * @return 0 on success; negative error code, otherwise
  *
  * If successful, 0 is returned and **btf** now has **base_btf** as its
  * base.
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index dd3b2f57082d..3dc8a8078815 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -190,6 +190,7 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
 	[BPF_MAP_TYPE_CGRP_STORAGE]		= "cgrp_storage",
 	[BPF_MAP_TYPE_ARENA]			= "arena",
+	[BPF_MAP_TYPE_INSN_ARRAY]		= "insn_array",
 };
 
 static const char * const prog_type_name[] = {
@@ -369,6 +370,7 @@ enum reloc_type {
 	RELO_EXTERN_CALL,
 	RELO_SUBPROG_ADDR,
 	RELO_CORE,
+	RELO_INSN_ARRAY,
 };
 
 struct reloc_desc {
@@ -379,7 +381,16 @@ struct reloc_desc {
 		struct {
 			int map_idx;
 			int sym_off;
-			int ext_idx;
+			/*
+			 * The following two fields can be unionized, as the
+			 * ext_idx field is used for extern symbols, and the
+			 * sym_size is used for jump tables, which are never
+			 * extern
+			 */
+			union {
+				int ext_idx;
+				int sym_size;
+			};
 		};
 	};
 };
@@ -421,6 +432,11 @@ struct bpf_sec_def {
 	libbpf_prog_attach_fn_t prog_attach_fn;
 };
 
+struct bpf_light_subprog {
+	__u32 sec_insn_off;
+	__u32 sub_insn_off;
+};
+
 /*
  * bpf_prog should be a better name but it has been used in
  * linux/filter.h.
@@ -494,6 +510,9 @@ struct bpf_program {
 	__u32 line_info_cnt;
 	__u32 prog_flags;
 	__u8  hash[SHA256_DIGEST_LENGTH];
+
+	struct bpf_light_subprog *subprogs;
+	__u32 subprog_cnt;
 };
 
 struct bpf_struct_ops {
@@ -667,6 +686,7 @@ struct elf_state {
 	int symbols_shndx;
 	bool has_st_ops;
 	int arena_data_shndx;
+	int jumptables_data_shndx;
 };
 
 struct usdt_manager;
@@ -738,6 +758,16 @@ struct bpf_object {
 	void *arena_data;
 	size_t arena_data_sz;
 
+	void *jumptables_data;
+	size_t jumptables_data_sz;
+
+	struct {
+		struct bpf_program *prog;
+		int sym_off;
+		int fd;
+	} *jumptable_maps;
+	size_t jumptable_map_cnt;
+
 	struct kern_feature_cache *feat_cache;
 	char *token_path;
 	int token_fd;
@@ -764,6 +794,7 @@ void bpf_program__unload(struct bpf_program *prog)
 
 	zfree(&prog->func_info);
 	zfree(&prog->line_info);
+	zfree(&prog->subprogs);
 }
 
 static void bpf_program__exit(struct bpf_program *prog)
@@ -2996,7 +3027,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 
 	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
 	data = elf_sec_data(obj, scn);
-	if (!scn || !data) {
+	if (!data) {
 		pr_warn("elf: failed to get %s map definitions for %s\n",
 			MAPS_ELF_SEC, obj->path);
 		return -EINVAL;
@@ -3942,6 +3973,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			} else if (strcmp(name, ARENA_SEC) == 0) {
 				obj->efile.arena_data = data;
 				obj->efile.arena_data_shndx = idx;
+			} else if (strcmp(name, JUMPTABLES_SEC) == 0) {
+				obj->jumptables_data = malloc(data->d_size);
+				if (!obj->jumptables_data)
+					return -ENOMEM;
+				memcpy(obj->jumptables_data, data->d_buf, data->d_size);
+				obj->jumptables_data_sz = data->d_size;
+				obj->efile.jumptables_data_shndx = idx;
 			} else {
 				pr_info("elf: skipping unrecognized data section(%d) %s\n",
 					idx, name);
@@ -4634,6 +4672,16 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 		return 0;
 	}
 
+	/* jump table data relocation */
+	if (shdr_idx == obj->efile.jumptables_data_shndx) {
+		reloc_desc->type = RELO_INSN_ARRAY;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->map_idx = -1;
+		reloc_desc->sym_off = sym->st_value;
+		reloc_desc->sym_size = sym->st_size;
+		return 0;
+	}
+
 	/* generic map reference relocation */
 	if (type == LIBBPF_MAP_UNSPEC) {
 		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
@@ -6144,6 +6192,157 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
 	insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
 }
 
+static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off)
+{
+	size_t i;
+
+	for (i = 0; i < obj->jumptable_map_cnt; i++) {
+		/*
+		 * This might happen that same offset is used for two different
+		 * programs (as jump tables can be the same). However, for
+		 * different programs different maps should be created.
+		 */
+		if (obj->jumptable_maps[i].sym_off == sym_off &&
+		    obj->jumptable_maps[i].prog == prog)
+			return obj->jumptable_maps[i].fd;
+	}
+
+	return -ENOENT;
+}
+
+static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd)
+{
+	size_t cnt = obj->jumptable_map_cnt;
+	size_t size = sizeof(obj->jumptable_maps[0]);
+	void *tmp;
+
+	tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size);
+	if (!tmp)
+		return -ENOMEM;
+
+	obj->jumptable_maps = tmp;
+	obj->jumptable_maps[cnt].prog = prog;
+	obj->jumptable_maps[cnt].sym_off = sym_off;
+	obj->jumptable_maps[cnt].fd = map_fd;
+	obj->jumptable_map_cnt++;
+
+	return 0;
+}
+
+static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
+{
+	int i;
+
+	for (i = prog->subprog_cnt - 1; i >= 0; i--) {
+		if (insn_idx >= prog->subprogs[i].sub_insn_off)
+			return i;
+	}
+
+	return -1;
+}
+
+static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
+{
+	const __u32 jt_entry_size = 8;
+	int sym_off = relo->sym_off;
+	int jt_size = relo->sym_size;
+	__u32 max_entries = jt_size / jt_entry_size;
+	__u32 value_size = sizeof(struct bpf_insn_array_value);
+	struct bpf_insn_array_value val = {};
+	int subprog_idx;
+	int map_fd, err;
+	__u64 insn_off;
+	__u64 *jt;
+	__u32 i;
+
+	map_fd = find_jt_map(obj, prog, sym_off);
+	if (map_fd >= 0)
+		return map_fd;
+
+	if (sym_off % jt_entry_size) {
+		pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n",
+			sym_off, jt_entry_size);
+		return -EINVAL;
+	}
+
+	if (jt_size % jt_entry_size) {
+		pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n",
+			jt_size, jt_entry_size);
+		return -EINVAL;
+	}
+
+	map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables",
+				4, value_size, max_entries, NULL);
+	if (map_fd < 0)
+		return map_fd;
+
+	if (!obj->jumptables_data) {
+		pr_warn("map '.jumptables': ELF file is missing jump table data\n");
+		err = -EINVAL;
+		goto err_close;
+	}
+	if (sym_off + jt_size > obj->jumptables_data_sz) {
+		pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n",
+			obj->jumptables_data_sz, sym_off + jt_size);
+		err = -EINVAL;
+		goto err_close;
+	}
+
+	subprog_idx = -1; /* main program */
+	if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) {
+		pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx);
+		err = -EINVAL;
+		goto err_close;
+	}
+	if (prog->subprogs)
+		subprog_idx = find_subprog_idx(prog, relo->insn_idx);
+
+	jt = (__u64 *)(obj->jumptables_data + sym_off);
+	for (i = 0; i < max_entries; i++) {
+		/*
+		 * The offset should be made to be relative to the beginning of
+		 * the main function, not the subfunction.
+		 */
+		insn_off = jt[i]/sizeof(struct bpf_insn);
+		if (subprog_idx >= 0) {
+			insn_off -= prog->subprogs[subprog_idx].sec_insn_off;
+			insn_off += prog->subprogs[subprog_idx].sub_insn_off;
+		} else {
+			insn_off -= prog->sec_insn_off;
+		}
+
+		/*
+		 * LLVM-generated jump tables contain u64 records, however
+		 * should contain values that fit in u32.
+		 */
+		if (insn_off > UINT32_MAX) {
+			pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n",
+				(long long)jt[i], sym_off + i * jt_entry_size);
+			err = -EINVAL;
+			goto err_close;
+		}
+
+		val.orig_off = insn_off;
+		err = bpf_map_update_elem(map_fd, &i, &val, 0);
+		if (err)
+			goto err_close;
+	}
+
+	err = bpf_map_freeze(map_fd);
+	if (err)
+		goto err_close;
+
+	err = add_jt_map(obj, prog, sym_off, map_fd);
+	if (err)
+		goto err_close;
+
+	return map_fd;
+
+err_close:
+	close(map_fd);
+	return err;
+}
+
 /* Relocate data references within program code:
  *  - map references;
  *  - global variable references;
@@ -6235,6 +6434,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 		case RELO_CORE:
 			/* will be handled by bpf_program_record_relos() */
 			break;
+		case RELO_INSN_ARRAY: {
+			int map_fd;
+
+			map_fd = create_jt_map(obj, prog, relo);
+			if (map_fd < 0) {
+				pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n",
+					prog->name, i, relo->sym_off);
+				return map_fd;
+			}
+			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+			insn->imm = map_fd;
+			insn->off = 0;
+		}
+			break;
 		default:
 			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
 				prog->name, i, relo->type);
@@ -6432,36 +6645,62 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
 	return 0;
 }
 
+static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+	size_t size = sizeof(main_prog->subprogs[0]);
+	int cnt = main_prog->subprog_cnt;
+	void *tmp;
+
+	tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size);
+	if (!tmp)
+		return -ENOMEM;
+
+	main_prog->subprogs = tmp;
+	main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off;
+	main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off;
+	main_prog->subprog_cnt++;
+
+	return 0;
+}
+
 static int
 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
 				struct bpf_program *subprog)
 {
-       struct bpf_insn *insns;
-       size_t new_cnt;
-       int err;
+	struct bpf_insn *insns;
+	size_t new_cnt;
+	int err;
 
-       subprog->sub_insn_off = main_prog->insns_cnt;
+	subprog->sub_insn_off = main_prog->insns_cnt;
 
-       new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
-       insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
-       if (!insns) {
-               pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
-               return -ENOMEM;
-       }
-       main_prog->insns = insns;
-       main_prog->insns_cnt = new_cnt;
+	new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+	insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+	if (!insns) {
+		pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+		return -ENOMEM;
+	}
+	main_prog->insns = insns;
+	main_prog->insns_cnt = new_cnt;
 
-       memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
-              subprog->insns_cnt * sizeof(*insns));
+	memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+	       subprog->insns_cnt * sizeof(*insns));
 
-       pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
-                main_prog->name, subprog->insns_cnt, subprog->name);
+	pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+		 main_prog->name, subprog->insns_cnt, subprog->name);
 
-       /* The subprog insns are now appended. Append its relos too. */
-       err = append_subprog_relos(main_prog, subprog);
-       if (err)
-               return err;
-       return 0;
+	/* The subprog insns are now appended. Append its relos too. */
+	err = append_subprog_relos(main_prog, subprog);
+	if (err)
+		return err;
+
+	err = save_subprog_offsets(main_prog, subprog);
+	if (err) {
+		pr_warn("prog '%s': failed to add subprog offsets: %s\n",
+			main_prog->name, errstr(err));
+		return err;
+	}
+
+	return 0;
 }
 
 static int
@@ -9228,6 +9467,13 @@ void bpf_object__close(struct bpf_object *obj)
 
 	zfree(&obj->arena_data);
 
+	zfree(&obj->jumptables_data);
+	obj->jumptables_data_sz = 0;
+
+	for (i = 0; i < obj->jumptable_map_cnt; i++)
+		close(obj->jumptable_maps[i].fd);
+	zfree(&obj->jumptable_maps);
+
 	free(obj);
 }
 
@@ -11325,8 +11571,6 @@ static const char *arch_specific_syscall_pfx(void)
 	return "ia32";
 #elif defined(__s390x__)
 	return "s390x";
-#elif defined(__s390__)
-	return "s390";
 #elif defined(__arm__)
 	return "arm";
 #elif defined(__aarch64__)
@@ -12113,8 +12357,6 @@ static const char *arch_specific_lib_paths(void)
 	return "/lib/i386-linux-gnu";
 #elif defined(__s390x__)
 	return "/lib/s390x-linux-gnu";
-#elif defined(__s390__)
-	return "/lib/s390-linux-gnu";
 #elif defined(__arm__) && defined(__SOFTFP__)
 	return "/lib/arm-linux-gnueabi";
 #elif defined(__arm__) && !defined(__SOFTFP__)
@@ -13858,8 +14100,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 		return libbpf_err(-EINVAL);
 
 	if (attach_prog_fd && !attach_func_name) {
-		/* remember attach_prog_fd and let bpf_program__load() find
-		 * BTF ID during the program load
+		/* Store attach_prog_fd. The BTF ID will be resolved later during
+		 * the normal object/program load phase.
 		 */
 		prog->attach_prog_fd = attach_prog_fd;
 		return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5118d0a90e24..65e68e964b89 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -448,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
 
 /**
  * @brief **bpf_program__unpin()** unpins the BPF program from a file
- * in the BPFFS specified by a path. This decrements the programs
+ * in the BPFFS specified by a path. This decrements program's in-kernel
  * reference count.
  *
  * The file pinning the BPF program can also be unlinked by a different
@@ -481,14 +481,12 @@ LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
 
 /**
  * @brief **bpf_link__unpin()** unpins the BPF link from a file
- * in the BPFFS specified by a path. This decrements the links
- * reference count.
+ * in the BPFFS. This decrements link's in-kernel reference count.
  *
  * The file pinning the BPF link can also be unlinked by a different
  * process in which case this function will return an error.
  *
- * @param prog BPF program to unpin
- * @param path file path to the pin in a BPF file system
+ * @param link BPF link to unpin
  * @return 0, on success; negative error code, otherwise
  */
 LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
@@ -995,8 +993,13 @@ LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
  *   - fentry/fexit/fmod_ret;
  *   - lsm;
  *   - freplace.
- * @param prog BPF program to set the attach type for
- * @param type attach type to set the BPF map to have
+ * @param prog BPF program to configure; must be not yet loaded.
+ * @param attach_prog_fd FD of target BPF program (for freplace/extension).
+ * If >0 and func name omitted, defers BTF ID resolution.
+ * @param attach_func_name Target function name. Used either with
+ * attach_prog_fd to find destination BTF type ID in that BPF program, or
+ * alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID.
+ * Must be provided if attach_prog_fd is 0.
  * @return error code; or 0 if no error occurred.
  */
 LIBBPF_API int
@@ -1098,6 +1101,7 @@ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
 /**
  * @brief **bpf_map__set_value_size()** sets map value size.
  * @param map the BPF map instance
+ * @param size the new value size
  * @return 0, on success; negative error, otherwise
  *
  * There is a special case for maps with associated memory-mapped regions, like
@@ -1202,7 +1206,7 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
  * per-CPU values value size has to be aligned up to closest 8 bytes for
  * alignment reasons, so expected size is: `round_up(value_size, 8)
  * * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__lookup_elem()** is high-level equivalent of
@@ -1226,7 +1230,7 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
  * per-CPU values value size has to be aligned up to closest 8 bytes for
  * alignment reasons, so expected size is: `round_up(value_size, 8)
  * * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__update_elem()** is high-level equivalent of
@@ -1242,7 +1246,7 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map,
  * @param map BPF map to delete element from
  * @param key pointer to memory containing bytes of the key
  * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__delete_elem()** is high-level equivalent of
@@ -1265,7 +1269,7 @@ LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map,
  * per-CPU values value size has to be aligned up to closest 8 bytes for
  * alignment reasons, so expected size is: `round_up(value_size, 8)
  * * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of
@@ -1637,6 +1641,7 @@ struct perf_buffer_opts {
  * @param sample_cb function called on each received data record
  * @param lost_cb function called when record loss has occurred
  * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb*
+ * @param opts optional parameters for the perf buffer, can be null
  * @return a new instance of struct perf_buffer on success, NULL on error with
  * *errno* containing an error code
  */
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 35b2527bedec..fc59b21b51b5 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -74,6 +74,8 @@
 #define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
 #endif
 
+#define JUMPTABLES_SEC ".jumptables"
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9dfbe7750f56..bccf4bb747e1 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -364,6 +364,10 @@ static int probe_map_create(enum bpf_map_type map_type)
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
 		break;
+	case BPF_MAP_TYPE_INSN_ARRAY:
+		key_size	= sizeof(__u32);
+		value_size	= sizeof(struct bpf_insn_array_value);
+		break;
 	case BPF_MAP_TYPE_UNSPEC:
 	default:
 		return -EOPNOTSUPP;
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 56ae77047bc3..f4403e3cf994 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -2025,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
 			obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
 			return 0;
 		}
+
+		if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0)
+			goto add_sym;
 	}
 
 	if (sym_bind == STB_LOCAL)
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index c174b4086673..d1524f6f54ae 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -1376,8 +1376,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
 
 #elif defined(__s390x__)
 
-/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
-
 static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
 {
 	unsigned int reg;
diff --git a/tools/docs/lib/__init__.py b/tools/lib/python/__init__.py
index e69de29bb2d1..e69de29bb2d1 100644
--- a/tools/docs/lib/__init__.py
+++ b/tools/lib/python/__init__.py
diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/abi/__init__.py
diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py
new file mode 100644
index 000000000000..9b8db70067ef
--- /dev/null
+++ b/tools/lib/python/abi/abi_parser.py
@@ -0,0 +1,628 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+from argparse import Namespace
+import logging
+import os
+import re
+
+from pprint import pformat
+from random import randrange, seed
+
+# Import Python modules
+
+from abi.helpers import AbiDebug, ABI_DIR
+
+
+class AbiParser:
+    """Main class to parse ABI files"""
+
+    TAGS = r"(what|where|date|kernelversion|contact|description|users)"
+    XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
+
+    def __init__(self, directory, logger=None,
+                 enable_lineno=False, show_warnings=True, debug=0):
+        """Stores arguments for the class and initialize class vars"""
+
+        self.directory = directory
+        self.enable_lineno = enable_lineno
+        self.show_warnings = show_warnings
+        self.debug = debug
+
+        if not logger:
+            self.log = logging.getLogger("get_abi")
+        else:
+            self.log = logger
+
+        self.data = {}
+        self.what_symbols = {}
+        self.file_refs = {}
+        self.what_refs = {}
+
+        # Ignore files that contain such suffixes
+        self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
+
+        # Regular expressions used on parser
+        self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
+        self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
+        self.re_valid = re.compile(self.TAGS)
+        self.re_start_spc = re.compile(r"(\s*)(\S.*)")
+        self.re_whitespace = re.compile(r"^\s+")
+
+        # Regular used on print
+        self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
+        self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
+        self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
+        self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
+        self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
+        self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
+        self.re_xref_node = re.compile(self.XREF)
+
+    def warn(self, fdata, msg, extra=None):
+        """Displays a parse error if warning is enabled"""
+
+        if not self.show_warnings:
+            return
+
+        msg = f"{fdata.fname}:{fdata.ln}: {msg}"
+        if extra:
+            msg += "\n\t\t" + extra
+
+        self.log.warning(msg)
+
+    def add_symbol(self, what, fname, ln=None, xref=None):
+        """Create a reference table describing where each 'what' is located"""
+
+        if what not in self.what_symbols:
+            self.what_symbols[what] = {"file": {}}
+
+        if fname not in self.what_symbols[what]["file"]:
+            self.what_symbols[what]["file"][fname] = []
+
+        if ln and ln not in self.what_symbols[what]["file"][fname]:
+            self.what_symbols[what]["file"][fname].append(ln)
+
+        if xref:
+            self.what_symbols[what]["xref"] = xref
+
+    def _parse_line(self, fdata, line):
+        """Parse a single line of an ABI file"""
+
+        new_what = False
+        new_tag = False
+        content = None
+
+        match = self.re_tag.match(line)
+        if match:
+            new = match.group(1).lower()
+            sep = match.group(2)
+            content = match.group(3)
+
+            match = self.re_valid.search(new)
+            if match:
+                new_tag = match.group(1)
+            else:
+                if fdata.tag == "description":
+                    # New "tag" is actually part of description.
+                    # Don't consider it a tag
+                    new_tag = False
+                elif fdata.tag != "":
+                    self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
+
+        if new_tag:
+            # "where" is Invalid, but was a common mistake. Warn if found
+            if new_tag == "where":
+                self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
+                new_tag = "what"
+
+            if new_tag == "what":
+                fdata.space = None
+
+                if content not in self.what_symbols:
+                    self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
+
+                if fdata.tag == "what":
+                    fdata.what.append(content.strip("\n"))
+                else:
+                    if fdata.key:
+                        if "description" not in self.data.get(fdata.key, {}):
+                            self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+                        for w in fdata.what:
+                            self.add_symbol(what=w, fname=fdata.fname,
+                                            ln=fdata.what_ln, xref=fdata.key)
+
+                    fdata.label = content
+                    new_what = True
+
+                    key = "abi_" + content.lower()
+                    fdata.key = self.re_unprintable.sub("_", key).strip("_")
+
+                    # Avoid duplicated keys but using a defined seed, to make
+                    # the namespace identical if there aren't changes at the
+                    # ABI symbols
+                    seed(42)
+
+                    while fdata.key in self.data:
+                        char = randrange(0, 51) + ord("A")
+                        if char > ord("Z"):
+                            char += ord("a") - ord("Z") - 1
+
+                        fdata.key += chr(char)
+
+                    if fdata.key and fdata.key not in self.data:
+                        self.data[fdata.key] = {
+                            "what": [content],
+                            "file": [fdata.file_ref],
+                            "path": fdata.ftype,
+                            "line_no": fdata.ln,
+                        }
+
+                    fdata.what = self.data[fdata.key]["what"]
+
+                self.what_refs[content] = fdata.key
+                fdata.tag = new_tag
+                fdata.what_ln = fdata.ln
+
+                if fdata.nametag["what"]:
+                    t = (content, fdata.key)
+                    if t not in fdata.nametag["symbols"]:
+                        fdata.nametag["symbols"].append(t)
+
+                return
+
+            if fdata.tag and new_tag:
+                fdata.tag = new_tag
+
+                if new_what:
+                    fdata.label = ""
+
+                    if "description" in self.data[fdata.key]:
+                        self.data[fdata.key]["description"] += "\n\n"
+
+                    if fdata.file_ref not in self.data[fdata.key]["file"]:
+                        self.data[fdata.key]["file"].append(fdata.file_ref)
+
+                    if self.debug == AbiDebug.WHAT_PARSING:
+                        self.log.debug("what: %s", fdata.what)
+
+                if not fdata.what:
+                    self.warn(fdata, "'What:' should come first:", line)
+                    return
+
+                if new_tag == "description":
+                    fdata.space = None
+
+                    if content:
+                        sep = sep.replace(":", " ")
+
+                        c = " " * len(new_tag) + sep + content
+                        c = c.expandtabs()
+
+                        match = self.re_start_spc.match(c)
+                        if match:
+                            # Preserve initial spaces for the first line
+                            fdata.space = match.group(1)
+                            content = match.group(2) + "\n"
+
+                self.data[fdata.key][fdata.tag] = content
+
+            return
+
+        # Store any contents before tags at the database
+        if not fdata.tag and "what" in fdata.nametag:
+            fdata.nametag["description"] += line
+            return
+
+        if fdata.tag == "description":
+            content = line.expandtabs()
+
+            if self.re_whitespace.sub("", content) == "":
+                self.data[fdata.key][fdata.tag] += "\n"
+                return
+
+            if fdata.space is None:
+                match = self.re_start_spc.match(content)
+                if match:
+                    # Preserve initial spaces for the first line
+                    fdata.space = match.group(1)
+
+                    content = match.group(2) + "\n"
+            else:
+                if content.startswith(fdata.space):
+                    content = content[len(fdata.space):]
+
+                else:
+                    fdata.space = ""
+
+            if fdata.tag == "what":
+                w = content.strip("\n")
+                if w:
+                    self.data[fdata.key][fdata.tag].append(w)
+            else:
+                self.data[fdata.key][fdata.tag] += content
+            return
+
+        content = line.strip()
+        if fdata.tag:
+            if fdata.tag == "what":
+                w = content.strip("\n")
+                if w:
+                    self.data[fdata.key][fdata.tag].append(w)
+            else:
+                self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
+            return
+
+        # Everything else is error
+        if content:
+            self.warn(fdata, "Unexpected content", line)
+
+    def parse_readme(self, nametag, fname):
+        """Parse ABI README file"""
+
+        nametag["what"] = ["Introduction"]
+        nametag["path"] = "README"
+        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+            for line in fp:
+                match = self.re_tag.match(line)
+                if match:
+                    new = match.group(1).lower()
+
+                    match = self.re_valid.search(new)
+                    if match:
+                        nametag["description"] += "\n:" + line
+                        continue
+
+                nametag["description"] += line
+
+    def parse_file(self, fname, path, basename):
+        """Parse a single file"""
+
+        ref = f"abi_file_{path}_{basename}"
+        ref = self.re_unprintable.sub("_", ref).strip("_")
+
+        # Store per-file state into a namespace variable. This will be used
+        # by the per-line parser state machine and by the warning function.
+        fdata = Namespace
+
+        fdata.fname = fname
+        fdata.name = basename
+
+        pos = fname.find(ABI_DIR)
+        if pos > 0:
+            f = fname[pos:]
+        else:
+            f = fname
+
+        fdata.file_ref = (f, ref)
+        self.file_refs[f] = ref
+
+        fdata.ln = 0
+        fdata.what_ln = 0
+        fdata.tag = ""
+        fdata.label = ""
+        fdata.what = []
+        fdata.key = None
+        fdata.xrefs = None
+        fdata.space = None
+        fdata.ftype = path.split("/")[0]
+
+        fdata.nametag = {}
+        fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
+        fdata.nametag["type"] = "File"
+        fdata.nametag["path"] = fdata.ftype
+        fdata.nametag["file"] = [fdata.file_ref]
+        fdata.nametag["line_no"] = 1
+        fdata.nametag["description"] = ""
+        fdata.nametag["symbols"] = []
+
+        self.data[ref] = fdata.nametag
+
+        if self.debug & AbiDebug.WHAT_OPEN:
+            self.log.debug("Opening file %s", fname)
+
+        if basename == "README":
+            self.parse_readme(fdata.nametag, fname)
+            return
+
+        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+            for line in fp:
+                fdata.ln += 1
+
+                self._parse_line(fdata, line)
+
+            if "description" in fdata.nametag:
+                fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
+
+            if fdata.key:
+                if "description" not in self.data.get(fdata.key, {}):
+                    self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+                for w in fdata.what:
+                    self.add_symbol(what=w, fname=fname, xref=fdata.key)
+
+    def _parse_abi(self, root=None):
+        """Internal function to parse documentation ABI recursively"""
+
+        if not root:
+            root = self.directory
+
+        with os.scandir(root) as obj:
+            for entry in obj:
+                name = os.path.join(root, entry.name)
+
+                if entry.is_dir():
+                    self._parse_abi(name)
+                    continue
+
+                if not entry.is_file():
+                    continue
+
+                basename = os.path.basename(name)
+
+                if basename.startswith("."):
+                    continue
+
+                if basename.endswith(self.ignore_suffixes):
+                    continue
+
+                path = self.re_abi_dir.sub("", os.path.dirname(name))
+
+                self.parse_file(name, path, basename)
+
+    def parse_abi(self, root=None):
+        """Parse documentation ABI"""
+
+        self._parse_abi(root)
+
+        if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
+            self.log.debug(pformat(self.data))
+
+    def desc_txt(self, desc):
+        """Print description as found inside ABI files"""
+
+        desc = desc.strip(" \t\n")
+
+        return desc + "\n\n"
+
+    def xref(self, fname):
+        """
+        Converts a Documentation/ABI + basename into a ReST cross-reference
+        """
+
+        xref = self.file_refs.get(fname)
+        if not xref:
+            return None
+        else:
+            return xref
+
+    def desc_rst(self, desc):
+        """Enrich ReST output by creating cross-references"""
+
+        # Remove title markups from the description
+        # Having titles inside ABI files will only work if extra
+        # care would be taken in order to strictly follow the same
+        # level order for each markup.
+        desc = self.re_title_mark.sub("\n\n", "\n" + desc)
+        desc = desc.rstrip(" \t\n").lstrip("\n")
+
+        # Python's regex performance for non-compiled expressions is a lot
+        # than Perl, as Perl automatically caches them at their
+        # first usage. Here, we'll need to do the same, as otherwise the
+        # performance penalty is be high
+
+        new_desc = ""
+        for d in desc.split("\n"):
+            if d == "":
+                new_desc += "\n"
+                continue
+
+            # Use cross-references for doc files where needed
+            d = self.re_doc.sub(r":doc:`/\1`", d)
+
+            # Use cross-references for ABI generated docs where needed
+            matches = self.re_abi.findall(d)
+            for m in matches:
+                abi = m[0] + m[1]
+
+                xref = self.file_refs.get(abi)
+                if not xref:
+                    # This may happen if ABI is on a separate directory,
+                    # like parsing ABI testing and symbol is at stable.
+                    # The proper solution is to move this part of the code
+                    # for it to be inside sphinx/kernel_abi.py
+                    self.log.info("Didn't find ABI reference for '%s'", abi)
+                else:
+                    new = self.re_escape.sub(r"\\\1", m[1])
+                    d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
+
+            # Seek for cross reference symbols like /sys/...
+            # Need to be careful to avoid doing it on a code block
+            if d[0] not in [" ", "\t"]:
+                matches = self.re_xref_node.findall(d)
+                for m in matches:
+                    # Finding ABI here is more complex due to wildcards
+                    xref = self.what_refs.get(m)
+                    if xref:
+                        new = self.re_escape.sub(r"\\\1", m)
+                        d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
+
+            new_desc += d + "\n"
+
+        return new_desc + "\n\n"
+
+    def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
+            filter_path=None):
+        """Print ABI at stdout"""
+
+        part = None
+        for key, v in sorted(self.data.items(),
+                             key=lambda x: (x[1].get("type", ""),
+                                            x[1].get("what"))):
+
+            wtype = v.get("type", "Symbol")
+            file_ref = v.get("file")
+            names = v.get("what", [""])
+
+            if wtype == "File":
+                if not show_file:
+                    continue
+            else:
+                if not show_symbols:
+                    continue
+
+            if filter_path:
+                if v.get("path") != filter_path:
+                    continue
+
+            msg = ""
+
+            if wtype != "File":
+                cur_part = names[0]
+                if cur_part.find("/") >= 0:
+                    match = self.re_what.match(cur_part)
+                    if match:
+                        symbol = match.group(1).rstrip("/")
+                        cur_part = "Symbols under " + symbol
+
+                if cur_part and cur_part != part:
+                    part = cur_part
+                    msg += part + "\n"+ "-" * len(part) +"\n\n"
+
+                msg += f".. _{key}:\n\n"
+
+                max_len = 0
+                for i in range(0, len(names)):           # pylint: disable=C0200
+                    names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
+
+                    max_len = max(max_len, len(names[i]))
+
+                msg += "+-" + "-" * max_len + "-+\n"
+                for name in names:
+                    msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
+                    msg += "+-" + "-" * max_len + "-+\n"
+                msg += "\n"
+
+            for ref in file_ref:
+                if wtype == "File":
+                    msg += f".. _{ref[1]}:\n\n"
+                else:
+                    base = os.path.basename(ref[0])
+                    msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
+
+            if wtype == "File":
+                msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
+
+            desc = v.get("description")
+            if not desc and wtype != "File":
+                msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
+
+            if desc:
+                if output_in_txt:
+                    msg += self.desc_txt(desc)
+                else:
+                    msg += self.desc_rst(desc)
+
+            symbols = v.get("symbols")
+            if symbols:
+                msg += "Has the following ABI:\n\n"
+
+                for w, label in symbols:
+                    # Escape special chars from content
+                    content = self.re_escape.sub(r"\\\1", w)
+
+                    msg += f"- :ref:`{content} <{label}>`\n\n"
+
+            users = v.get("users")
+            if users and users.strip(" \t\n"):
+                users = users.strip("\n").replace('\n', '\n\t')
+                msg += f"Users:\n\t{users}\n\n"
+
+            ln = v.get("line_no", 1)
+
+            yield (msg, file_ref[0][0], ln)
+
+    def check_issues(self):
+        """Warn about duplicated ABI entries"""
+
+        for what, v in self.what_symbols.items():
+            files = v.get("file")
+            if not files:
+                # Should never happen if the parser works properly
+                self.log.warning("%s doesn't have a file associated", what)
+                continue
+
+            if len(files) == 1:
+                continue
+
+            f = []
+            for fname, lines in sorted(files.items()):
+                if not lines:
+                    f.append(f"{fname}")
+                elif len(lines) == 1:
+                    f.append(f"{fname}:{lines[0]}")
+                else:
+                    m = fname + "lines "
+                    m += ", ".join(str(x) for x in lines)
+                    f.append(m)
+
+            self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
+
+    def search_symbols(self, expr):
+        """ Searches for ABI symbols """
+
+        regex = re.compile(expr, re.I)
+
+        found_keys = 0
+        for t in sorted(self.data.items(), key=lambda x: [0]):
+            v = t[1]
+
+            wtype = v.get("type", "")
+            if wtype == "File":
+                continue
+
+            for what in v.get("what", [""]):
+                if regex.search(what):
+                    found_keys += 1
+
+                    kernelversion = v.get("kernelversion", "").strip(" \t\n")
+                    date = v.get("date", "").strip(" \t\n")
+                    contact = v.get("contact", "").strip(" \t\n")
+                    users = v.get("users", "").strip(" \t\n")
+                    desc = v.get("description", "").strip(" \t\n")
+
+                    files = []
+                    for f in v.get("file", ()):
+                        files.append(f[0])
+
+                    what = str(found_keys) + ". " + what
+                    title_tag = "-" * len(what)
+
+                    print(f"\n{what}\n{title_tag}\n")
+
+                    if kernelversion:
+                        print(f"Kernel version:\t\t{kernelversion}")
+
+                    if date:
+                        print(f"Date:\t\t\t{date}")
+
+                    if contact:
+                        print(f"Contact:\t\t{contact}")
+
+                    if users:
+                        print(f"Users:\t\t\t{users}")
+
+                    print("Defined on file(s):\t" + ", ".join(files))
+
+                    if desc:
+                        desc = desc.strip("\n")
+                        print(f"\n{desc}\n")
+
+        if not found_keys:
+            print(f"Regular expression /{expr}/ not found.")
diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py
new file mode 100644
index 000000000000..d5553206de3c
--- /dev/null
+++ b/tools/lib/python/abi/abi_regex.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# xxpylint: disable=R0903
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Convert ABI what into regular expressions
+"""
+
+import re
+import sys
+
+from pprint import pformat
+
+from abi.abi_parser import AbiParser
+from abi.helpers import AbiDebug
+
+class AbiRegex(AbiParser):
+    """Extends AbiParser to search ABI nodes with regular expressions"""
+
+    # Escape only ASCII visible characters
+    escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
+    leave_others = "others"
+
+    # Tuples with regular expressions to be compiled and replacement data
+    re_whats = [
+        # Drop escape characters that might exist
+        (re.compile("\\\\"), ""),
+
+        # Temporarily escape dot characters
+        (re.compile(r"\."),  "\xf6"),
+
+        # Temporarily change [0-9]+ type of patterns
+        (re.compile(r"\[0\-9\]\+"),  "\xff"),
+
+        # Temporarily change [\d+-\d+] type of patterns
+        (re.compile(r"\[0\-\d+\]"),  "\xff"),
+        (re.compile(r"\[0:\d+\]"),  "\xff"),
+        (re.compile(r"\[(\d+)\]"),  "\xf4\\\\d+\xf5"),
+
+        # Temporarily change [0-9] type of patterns
+        (re.compile(r"\[(\d)\-(\d)\]"),  "\xf4\1-\2\xf5"),
+
+        # Handle multiple option patterns
+        (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
+
+        # Handle wildcards
+        (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
+        (re.compile(r"/\*/"), "/.*/"),
+        (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
+        (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
+        (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
+        (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
+
+        (re.compile(r"XX+"), "\\\\w\xf7"),
+        (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
+        (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
+        (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
+
+        # Recover [0-9] type of patterns
+        (re.compile(r"\xf4"), "["),
+        (re.compile(r"\xf5"),  "]"),
+
+        # Remove duplicated spaces
+        (re.compile(r"\s+"), r" "),
+
+        # Special case: drop comparison as in:
+        # What: foo = <something>
+        # (this happens on a few IIO definitions)
+        (re.compile(r"\s*\=.*$"), ""),
+
+        # Escape all other symbols
+        (re.compile(escape_symbols), r"\\\1"),
+        (re.compile(r"\\\\"), r"\\"),
+        (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
+        (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
+
+        (re.compile(r"\xff"), r"\\d+"),
+
+        # Special case: IIO ABI which a parenthesis.
+        (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
+
+        # Simplify regexes with multiple .*
+        (re.compile(r"(?:\.\*){2,}"),  ""),
+
+        # Recover dot characters
+        (re.compile(r"\xf6"), "\\."),
+        # Recover plus characters
+        (re.compile(r"\xf7"), "+"),
+    ]
+    re_has_num = re.compile(r"\\d")
+
+    # Symbol name after escape_chars that are considered a devnode basename
+    re_symbol_name =  re.compile(r"(\w|\\[\.\-\:])+$")
+
+    # List of popular group names to be skipped to minimize regex group size
+    # Use AbiDebug.SUBGROUP_SIZE to detect those
+    skip_names = set(["devices", "hwmon"])
+
+    def regex_append(self, what, new):
+        """
+        Get a search group for a subset of regular expressions.
+
+        As ABI may have thousands of symbols, using a for to search all
+        regular expressions is at least O(n^2). When there are wildcards,
+        the complexity increases substantially, eventually becoming exponential.
+
+        To avoid spending too much time on them, use a logic to split
+        them into groups. The smaller the group, the better, as it would
+        mean that searches will be confined to a small number of regular
+        expressions.
+
+        The conversion to a regex subset is tricky, as we need something
+        that can be easily obtained from the sysfs symbol and from the
+        regular expression. So, we need to discard nodes that have
+        wildcards.
+
+        If it can't obtain a subgroup, place the regular expression inside
+        a special group (self.leave_others).
+        """
+
+        search_group = None
+
+        for search_group in reversed(new.split("/")):
+            if not search_group or search_group in self.skip_names:
+                continue
+            if self.re_symbol_name.match(search_group):
+                break
+
+        if not search_group:
+            search_group = self.leave_others
+
+        if self.debug & AbiDebug.SUBGROUP_MAP:
+            self.log.debug("%s: mapped as %s", what, search_group)
+
+        try:
+            if search_group not in self.regex_group:
+                self.regex_group[search_group] = []
+
+            self.regex_group[search_group].append(re.compile(new))
+            if self.search_string:
+                if what.find(self.search_string) >= 0:
+                    print(f"What: {what}")
+        except re.PatternError:
+            self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
+                             "           '%s'", what, new)
+
+    def get_regexes(self, what):
+        """
+        Given an ABI devnode, return a list of all regular expressions that
+        may match it, based on the sub-groups created by regex_append()
+        """
+
+        re_list = []
+
+        patches = what.split("/")
+        patches.reverse()
+        patches.append(self.leave_others)
+
+        for search_group in patches:
+            if search_group in self.regex_group:
+                re_list += self.regex_group[search_group]
+
+        return re_list
+
+    def __init__(self, *args, **kwargs):
+        """
+        Override init method to get verbose argument
+        """
+
+        self.regex_group = None
+        self.search_string = None
+        self.re_string = None
+
+        if "search_string" in kwargs:
+            self.search_string = kwargs.get("search_string")
+            del kwargs["search_string"]
+
+            if self.search_string:
+
+                try:
+                    self.re_string = re.compile(self.search_string)
+                except re.PatternError as e:
+                    msg = f"{self.search_string} is not a valid regular expression"
+                    raise ValueError(msg) from e
+
+        super().__init__(*args, **kwargs)
+
+    def parse_abi(self, *args, **kwargs):
+
+        super().parse_abi(*args, **kwargs)
+
+        self.regex_group = {}
+
+        print("Converting ABI What fields into regexes...", file=sys.stderr)
+
+        for t in sorted(self.data.items(), key=lambda x: x[0]):
+            v = t[1]
+            if v.get("type") == "File":
+                continue
+
+            v["regex"] = []
+
+            for what in v.get("what", []):
+                if not what.startswith("/sys"):
+                    continue
+
+                new = what
+                for r, s in self.re_whats:
+                    try:
+                        new = r.sub(s, new)
+                    except re.PatternError as e:
+                        # Help debugging troubles with new regexes
+                        raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
+
+                v["regex"].append(new)
+
+                if self.debug & AbiDebug.REGEX:
+                    self.log.debug("%-90s <== %s", new, what)
+
+                # Store regex into a subgroup to speedup searches
+                self.regex_append(what, new)
+
+        if self.debug & AbiDebug.SUBGROUP_DICT:
+            self.log.debug("%s", pformat(self.regex_group))
+
+        if self.debug & AbiDebug.SUBGROUP_SIZE:
+            biggestd_keys = sorted(self.regex_group.keys(),
+                                   key= lambda k: len(self.regex_group[k]),
+                                   reverse=True)
+
+            print("Top regex subgroups:", file=sys.stderr)
+            for k in biggestd_keys[:10]:
+                print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py
new file mode 100644
index 000000000000..639b23e4ca33
--- /dev/null
+++ b/tools/lib/python/abi/helpers.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=R0903
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Helper classes for ABI parser
+"""
+
+ABI_DIR = "Documentation/ABI/"
+
+
+class AbiDebug:
+    """Debug levels"""
+
+    WHAT_PARSING = 1
+    WHAT_OPEN = 2
+    DUMP_ABI_STRUCTS = 4
+    UNDEFINED = 8
+    REGEX = 16
+    SUBGROUP_MAP = 32
+    SUBGROUP_DICT = 64
+    SUBGROUP_SIZE = 128
+    GRAPH = 256
+
+
+DEBUG_HELP = """
+1  - enable debug parsing logic
+2  - enable debug messages on file open
+4  - enable debug for ABI parse data
+8  - enable extra debug information to identify troubles
+     with ABI symbols found at the local machine that
+     weren't found on ABI documentation (used only for
+     undefined subcommand)
+16 - enable debug for what to regex conversion
+32 - enable debug for symbol regex subgroups
+64 - enable debug for sysfs graph tree variable
+"""
diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py
new file mode 100644
index 000000000000..4a2554da217b
--- /dev/null
+++ b/tools/lib/python/abi/system_symbols.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0912,R0914,R0915,R1702
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+import os
+import re
+import sys
+
+from concurrent import futures
+from datetime import datetime
+from random import shuffle
+
+from abi.helpers import AbiDebug
+
+class SystemSymbols:
+    """Stores arguments for the class and initialize class vars"""
+
+    def graph_add_file(self, path, link=None):
+        """
+        add a file path to the sysfs graph stored at self.root
+        """
+
+        if path in self.files:
+            return
+
+        name = ""
+        ref = self.root
+        for edge in path.split("/"):
+            name += edge + "/"
+            if edge not in ref:
+                ref[edge] = {"__name": [name.rstrip("/")]}
+
+            ref = ref[edge]
+
+        if link and link not in ref["__name"]:
+            ref["__name"].append(link.rstrip("/"))
+
+        self.files.add(path)
+
+    def print_graph(self, root_prefix="", root=None, level=0):
+        """Prints a reference tree graph using UTF-8 characters"""
+
+        if not root:
+            root = self.root
+            level = 0
+
+        # Prevent endless traverse
+        if level > 5:
+            return
+
+        if level > 0:
+            prefix = "├──"
+            last_prefix = "└──"
+        else:
+            prefix = ""
+            last_prefix = ""
+
+        items = list(root.items())
+
+        names = root.get("__name", [])
+        for k, edge in items:
+            if k == "__name":
+                continue
+
+            if not k:
+                k = "/"
+
+            if len(names) > 1:
+                k += " links: " + ",".join(names[1:])
+
+            if edge == items[-1][1]:
+                print(root_prefix + last_prefix + k)
+                p = root_prefix
+                if level > 0:
+                    p += "   "
+                self.print_graph(p, edge, level + 1)
+            else:
+                print(root_prefix + prefix + k)
+                p = root_prefix + "│   "
+                self.print_graph(p, edge, level + 1)
+
+    def _walk(self, root):
+        """
+        Walk through sysfs to get all devnodes that aren't ignored.
+
+        By default, uses /sys as sysfs mounting point. If another
+        directory is used, it replaces them to /sys at the patches.
+        """
+
+        with os.scandir(root) as obj:
+            for entry in obj:
+                path = os.path.join(root, entry.name)
+                if self.sysfs:
+                    p = path.replace(self.sysfs, "/sys", count=1)
+                else:
+                    p = path
+
+                if self.re_ignore.search(p):
+                    return
+
+                # Handle link first to avoid directory recursion
+                if entry.is_symlink():
+                    real = os.path.realpath(path)
+                    if not self.sysfs:
+                        self.aliases[path] = real
+                    else:
+                        real = real.replace(self.sysfs, "/sys", count=1)
+
+                    # Add absfile location to graph if it doesn't exist
+                    if not self.re_ignore.search(real):
+                        # Add link to the graph
+                        self.graph_add_file(real, p)
+
+                elif entry.is_file():
+                    self.graph_add_file(p)
+
+                elif entry.is_dir():
+                    self._walk(path)
+
+    def __init__(self, abi, sysfs="/sys", hints=False):
+        """
+        Initialize internal variables and get a list of all files inside
+        sysfs that can currently be parsed.
+
+        Please notice that there are several entries on sysfs that aren't
+        documented as ABI. Ignore those.
+
+        The real paths will be stored under self.files. Aliases will be
+        stored in separate, as self.aliases.
+        """
+
+        self.abi = abi
+        self.log = abi.log
+
+        if sysfs != "/sys":
+            self.sysfs = sysfs.rstrip("/")
+        else:
+            self.sysfs = None
+
+        self.hints = hints
+
+        self.root = {}
+        self.aliases = {}
+        self.files = set()
+
+        dont_walk = [
+            # Those require root access and aren't documented at ABI
+            f"^{sysfs}/kernel/debug",
+            f"^{sysfs}/kernel/tracing",
+            f"^{sysfs}/fs/pstore",
+            f"^{sysfs}/fs/bpf",
+            f"^{sysfs}/fs/fuse",
+
+            # This is not documented at ABI
+            f"^{sysfs}/module",
+
+            f"^{sysfs}/fs/cgroup",  # this is big and has zero docs under ABI
+            f"^{sysfs}/firmware",   # documented elsewhere: ACPI, DT bindings
+            "sections|notes",       # aren't actually part of ABI
+
+            # kernel-parameters.txt - not easy to parse
+            "parameters",
+        ]
+
+        self.re_ignore = re.compile("|".join(dont_walk))
+
+        print(f"Reading {sysfs} directory contents...", file=sys.stderr)
+        self._walk(sysfs)
+
+    def check_file(self, refs, found):
+        """Check missing ABI symbols for a given sysfs file"""
+
+        res_list = []
+
+        try:
+            for names in refs:
+                fname = names[0]
+
+                res = {
+                    "found": False,
+                    "fname": fname,
+                    "msg": "",
+                }
+                res_list.append(res)
+
+                re_what = self.abi.get_regexes(fname)
+                if not re_what:
+                    self.abi.log.warning(f"missing rules for {fname}")
+                    continue
+
+                for name in names:
+                    for r in re_what:
+                        if self.abi.debug & AbiDebug.UNDEFINED:
+                            self.log.debug("check if %s matches '%s'", name, r.pattern)
+                        if r.match(name):
+                            res["found"] = True
+                            if found:
+                                res["msg"] += f"  {fname}: regex:\n\t"
+                            continue
+
+                if self.hints and not res["found"]:
+                    res["msg"] += f"  {fname} not found. Tested regexes:\n"
+                    for r in re_what:
+                        res["msg"] += "    " + r.pattern + "\n"
+
+        except KeyboardInterrupt:
+            pass
+
+        return res_list
+
+    def _ref_interactor(self, root):
+        """Recursive function to interact over the sysfs tree"""
+
+        for k, v in root.items():
+            if isinstance(v, dict):
+                yield from self._ref_interactor(v)
+
+            if root == self.root or k == "__name":
+                continue
+
+            if self.abi.re_string:
+                fname = v["__name"][0]
+                if self.abi.re_string.search(fname):
+                    yield v
+            else:
+                yield v
+
+
+    def get_fileref(self, all_refs, chunk_size):
+        """Interactor to group refs into chunks"""
+
+        n = 0
+        refs = []
+
+        for ref in all_refs:
+            refs.append(ref)
+
+            n += 1
+            if n >= chunk_size:
+                yield refs
+                n = 0
+                refs = []
+
+        yield refs
+
+    def check_undefined_symbols(self, max_workers=None, chunk_size=50,
+                                found=None, dry_run=None):
+        """Seach ABI for sysfs symbols missing documentation"""
+
+        self.abi.parse_abi()
+
+        if self.abi.debug & AbiDebug.GRAPH:
+            self.print_graph()
+
+        all_refs = []
+        for ref in self._ref_interactor(self.root):
+            all_refs.append(ref["__name"])
+
+        if dry_run:
+            print("Would check", file=sys.stderr)
+            for ref in all_refs:
+                print(", ".join(ref))
+
+            return
+
+        print("Starting to search symbols (it may take several minutes):",
+              file=sys.stderr)
+        start = datetime.now()
+        old_elapsed = None
+
+        # Python doesn't support multithreading due to limitations on its
+        # global lock (GIL). While Python 3.13 finally made GIL optional,
+        # there are still issues related to it. Also, we want to have
+        # backward compatibility with older versions of Python.
+        #
+        # So, use instead multiprocess. However, Python is very slow passing
+        # data from/to multiple processes. Also, it may consume lots of memory
+        # if the data to be shared is not small.  So, we need to group workload
+        # in chunks that are big enough to generate performance gains while
+        # not being so big that would cause out-of-memory.
+
+        num_refs = len(all_refs)
+        print(f"Number of references to parse: {num_refs}", file=sys.stderr)
+
+        if not max_workers:
+            max_workers = os.cpu_count()
+        elif max_workers > os.cpu_count():
+            max_workers = os.cpu_count()
+
+        max_workers = max(max_workers, 1)
+
+        max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
+        chunk_size = min(chunk_size, max_chunk_size)
+        chunk_size = max(1, chunk_size)
+
+        if max_workers > 1:
+            executor = futures.ProcessPoolExecutor
+
+            # Place references in a random order. This may help improving
+            # performance, by mixing complex/simple expressions when creating
+            # chunks
+            shuffle(all_refs)
+        else:
+            # Python has a high overhead with processes. When there's just
+            # one worker, it is faster to not create a new process.
+            # Yet, User still deserves to have a progress print. So, use
+            # python's "thread", which is actually a single process, using
+            # an internal schedule to switch between tasks. No performance
+            # gains for non-IO tasks, but still it can be quickly interrupted
+            # from time to time to display progress.
+            executor = futures.ThreadPoolExecutor
+
+        not_found = []
+        f_list = []
+        with executor(max_workers=max_workers) as exe:
+            for refs in self.get_fileref(all_refs, chunk_size):
+                if refs:
+                    try:
+                        f_list.append(exe.submit(self.check_file, refs, found))
+
+                    except KeyboardInterrupt:
+                        return
+
+            total = len(f_list)
+
+            if not total:
+                if self.abi.re_string:
+                    print(f"No ABI symbol matches {self.abi.search_string}")
+                else:
+                    self.abi.log.warning("No ABI symbols found")
+                return
+
+            print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
+                  file=sys.stderr)
+
+            while f_list:
+                try:
+                    t = futures.wait(f_list, timeout=1,
+                                     return_when=futures.FIRST_COMPLETED)
+
+                    done = t[0]
+
+                    for fut in done:
+                        res_list = fut.result()
+
+                        for res in res_list:
+                            if not res["found"]:
+                                not_found.append(res["fname"])
+                            if res["msg"]:
+                                print(res["msg"])
+
+                        f_list.remove(fut)
+                except KeyboardInterrupt:
+                    return
+
+                except RuntimeError as e:
+                    self.abi.log.warning(f"Future: {e}")
+                    break
+
+                if sys.stderr.isatty():
+                    elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+                    if len(f_list) < total:
+                        elapsed += f" ({total - len(f_list)}/{total} jobs completed).  "
+                    if elapsed != old_elapsed:
+                        print(elapsed + "\r", end="", flush=True,
+                              file=sys.stderr)
+                        old_elapsed = elapsed
+
+        elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+        print(elapsed, file=sys.stderr)
+
+        for f in sorted(not_found):
+            print(f"{f} not found.")
diff --git a/tools/lib/python/feat/parse_features.py b/tools/lib/python/feat/parse_features.py
new file mode 100755
index 000000000000..b88c04d3e2fe
--- /dev/null
+++ b/tools/lib/python/feat/parse_features.py
@@ -0,0 +1,494 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0911,R0912,R0914,R0915
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+
+"""
+Library to parse the Linux Feature files and produce a ReST book.
+"""
+
+import os
+import re
+import sys
+
+from glob import iglob
+
+
+class ParseFeature:
+    """
+    Parses Documentation/features, allowing to generate ReST documentation
+    from it.
+    """
+
+    h_name = "Feature"
+    h_kconfig = "Kconfig"
+    h_description = "Description"
+    h_subsys = "Subsystem"
+    h_status = "Status"
+    h_arch = "Architecture"
+
+    # Sort order for status. Others will be mapped at the end.
+    status_map = {
+        "ok":   0,
+        "TODO": 1,
+        "N/A":  2,
+        # The only missing status is "..", which was mapped as "---",
+        # as this is an special ReST cell value. Let it get the
+        # default order (99).
+    }
+
+    def __init__(self, prefix, debug=0, enable_fname=False):
+        """
+        Sets internal variables
+        """
+
+        self.prefix = prefix
+        self.debug = debug
+        self.enable_fname = enable_fname
+
+        self.data = {}
+
+        # Initial maximum values use just the headers
+        self.max_size_name = len(self.h_name)
+        self.max_size_kconfig = len(self.h_kconfig)
+        self.max_size_description = len(self.h_description)
+        self.max_size_desc_word = 0
+        self.max_size_subsys = len(self.h_subsys)
+        self.max_size_status = len(self.h_status)
+        self.max_size_arch = len(self.h_arch)
+        self.max_size_arch_with_header = self.max_size_arch + self.max_size_arch
+        self.description_size = 1
+
+        self.msg = ""
+
+    def emit(self, msg="", end="\n"):
+        self.msg += msg + end
+
+    def parse_error(self, fname, ln, msg, data=None):
+        """
+        Displays an error message, printing file name and line
+        """
+
+        if ln:
+            fname += f"#{ln}"
+
+        print(f"Warning: file {fname}: {msg}", file=sys.stderr, end="")
+
+        if data:
+            data = data.rstrip()
+            print(f":\n\t{data}", file=sys.stderr)
+        else:
+            print("", file=sys.stderr)
+
+    def parse_feat_file(self, fname):
+        """Parses a single arch-support.txt feature file"""
+
+        if os.path.isdir(fname):
+            return
+
+        base = os.path.basename(fname)
+
+        if base != "arch-support.txt":
+            if self.debug:
+                print(f"ignoring {fname}", file=sys.stderr)
+            return
+
+        subsys = os.path.dirname(fname).split("/")[-2]
+        self.max_size_subsys = max(self.max_size_subsys, len(subsys))
+
+        feature_name = ""
+        kconfig = ""
+        description = ""
+        comments = ""
+        arch_table = {}
+
+        if self.debug > 1:
+            print(f"Opening {fname}", file=sys.stderr)
+
+        if self.enable_fname:
+            full_fname = os.path.abspath(fname)
+            self.emit(f".. FILE {full_fname}")
+
+        with open(fname, encoding="utf-8") as f:
+            for ln, line in enumerate(f, start=1):
+                line = line.strip()
+
+                match = re.match(r"^\#\s+Feature\s+name:\s*(.*\S)", line)
+                if match:
+                    feature_name = match.group(1)
+
+                    self.max_size_name = max(self.max_size_name,
+                                             len(feature_name))
+                    continue
+
+                match = re.match(r"^\#\s+Kconfig:\s*(.*\S)", line)
+                if match:
+                    kconfig = match.group(1)
+
+                    self.max_size_kconfig = max(self.max_size_kconfig,
+                                                len(kconfig))
+                    continue
+
+                match = re.match(r"^\#\s+description:\s*(.*\S)", line)
+                if match:
+                    description = match.group(1)
+
+                    self.max_size_description = max(self.max_size_description,
+                                                    len(description))
+
+                    words = re.split(r"\s+", line)[1:]
+                    for word in words:
+                        self.max_size_desc_word = max(self.max_size_desc_word,
+                                                        len(word))
+
+                    continue
+
+                if re.search(r"^\\s*$", line):
+                    continue
+
+                if re.match(r"^\s*\-+\s*$", line):
+                    continue
+
+                if re.search(r"^\s*\|\s*arch\s*\|\s*status\s*\|\s*$", line):
+                    continue
+
+                match = re.match(r"^\#\s*(.*)$", line)
+                if match:
+                    comments += match.group(1)
+                    continue
+
+                match = re.match(r"^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$", line)
+                if match:
+                    arch = match.group(1)
+                    status = match.group(2)
+
+                    self.max_size_status = max(self.max_size_status,
+                                               len(status))
+                    self.max_size_arch = max(self.max_size_arch, len(arch))
+
+                    if status == "..":
+                        status = "---"
+
+                    arch_table[arch] = status
+
+                    continue
+
+                self.parse_error(fname, ln, "Line is invalid", line)
+
+        if not feature_name:
+            self.parse_error(fname, 0, "Feature name not found")
+            return
+        if not subsys:
+            self.parse_error(fname, 0, "Subsystem not found")
+            return
+        if not kconfig:
+            self.parse_error(fname, 0, "Kconfig not found")
+            return
+        if not description:
+            self.parse_error(fname, 0, "Description not found")
+            return
+        if not arch_table:
+            self.parse_error(fname, 0, "Architecture table not found")
+            return
+
+        self.data[feature_name] = {
+            "where": fname,
+            "subsys": subsys,
+            "kconfig": kconfig,
+            "description": description,
+            "comments": comments,
+            "table": arch_table,
+        }
+
+        self.max_size_arch_with_header = self.max_size_arch + len(self.h_arch)
+
+    def parse(self):
+        """Parses all arch-support.txt feature files inside self.prefix"""
+
+        path = os.path.expanduser(self.prefix)
+
+        if self.debug > 2:
+            print(f"Running parser for {path}")
+
+        example_path = os.path.join(path, "arch-support.txt")
+
+        for fname in iglob(os.path.join(path, "**"), recursive=True):
+            if fname != example_path:
+                self.parse_feat_file(fname)
+
+        return self.data
+
+    def output_arch_table(self, arch, feat=None):
+        """
+        Output feature(s) for a given architecture.
+        """
+
+        title = f"Feature status on {arch} architecture"
+
+        self.emit("=" * len(title))
+        self.emit(title)
+        self.emit("=" * len(title))
+        self.emit()
+
+        self.emit("=" * self.max_size_subsys + "  ", end="")
+        self.emit("=" * self.max_size_name + "  ", end="")
+        self.emit("=" * self.max_size_kconfig + "  ", end="")
+        self.emit("=" * self.max_size_status + "  ", end="")
+        self.emit("=" * self.max_size_description)
+
+        self.emit(f"{self.h_subsys:<{self.max_size_subsys}}  ", end="")
+        self.emit(f"{self.h_name:<{self.max_size_name}}  ", end="")
+        self.emit(f"{self.h_kconfig:<{self.max_size_kconfig}}  ", end="")
+        self.emit(f"{self.h_status:<{self.max_size_status}}  ", end="")
+        self.emit(f"{self.h_description:<{self.max_size_description}}")
+
+        self.emit("=" * self.max_size_subsys + "  ", end="")
+        self.emit("=" * self.max_size_name + "  ", end="")
+        self.emit("=" * self.max_size_kconfig + "  ", end="")
+        self.emit("=" * self.max_size_status + "  ", end="")
+        self.emit("=" * self.max_size_description)
+
+        sorted_features = sorted(self.data.keys(),
+                                 key=lambda x: (self.data[x]["subsys"],
+                                                x.lower()))
+
+        for name in sorted_features:
+            if feat and name != feat:
+                continue
+
+            arch_table = self.data[name]["table"]
+
+            if not arch in arch_table:
+                continue
+
+            self.emit(f"{self.data[name]['subsys']:<{self.max_size_subsys}}  ",
+                  end="")
+            self.emit(f"{name:<{self.max_size_name}}  ", end="")
+            self.emit(f"{self.data[name]['kconfig']:<{self.max_size_kconfig}}  ",
+                  end="")
+            self.emit(f"{arch_table[arch]:<{self.max_size_status}}  ",
+                  end="")
+            self.emit(f"{self.data[name]['description']}")
+
+        self.emit("=" * self.max_size_subsys + "  ", end="")
+        self.emit("=" * self.max_size_name + "  ", end="")
+        self.emit("=" * self.max_size_kconfig + "  ", end="")
+        self.emit("=" * self.max_size_status + "  ", end="")
+        self.emit("=" * self.max_size_description)
+
+        return self.msg
+
+    def output_feature(self, feat):
+        """
+        Output a feature on all architectures
+        """
+
+        title = f"Feature {feat}"
+
+        self.emit("=" * len(title))
+        self.emit(title)
+        self.emit("=" * len(title))
+        self.emit()
+
+        if not feat in self.data:
+            return
+
+        if self.data[feat]["subsys"]:
+            self.emit(f":Subsystem: {self.data[feat]['subsys']}")
+        if self.data[feat]["kconfig"]:
+            self.emit(f":Kconfig: {self.data[feat]['kconfig']}")
+
+        desc = self.data[feat]["description"]
+        desc = desc[0].upper() + desc[1:]
+        desc = desc.rstrip(". \t")
+        self.emit(f"\n{desc}.\n")
+
+        com = self.data[feat]["comments"].strip()
+        if com:
+            self.emit("Comments")
+            self.emit("--------")
+            self.emit(f"\n{com}\n")
+
+        self.emit("=" * self.max_size_arch + "  ", end="")
+        self.emit("=" * self.max_size_status)
+
+        self.emit(f"{self.h_arch:<{self.max_size_arch}}  ", end="")
+        self.emit(f"{self.h_status:<{self.max_size_status}}")
+
+        self.emit("=" * self.max_size_arch + "  ", end="")
+        self.emit("=" * self.max_size_status)
+
+        arch_table = self.data[feat]["table"]
+        for arch in sorted(arch_table.keys()):
+            self.emit(f"{arch:<{self.max_size_arch}}  ", end="")
+            self.emit(f"{arch_table[arch]:<{self.max_size_status}}")
+
+        self.emit("=" * self.max_size_arch + "  ", end="")
+        self.emit("=" * self.max_size_status)
+
+        return self.msg
+
+    def matrix_lines(self, desc_size, max_size_status, header):
+        """
+        Helper function to split element tables at the output matrix
+        """
+
+        if header:
+            ln_marker = "="
+        else:
+            ln_marker = "-"
+
+        self.emit("+" + ln_marker * self.max_size_name + "+", end="")
+        self.emit(ln_marker * desc_size, end="")
+        self.emit("+" + ln_marker * max_size_status + "+")
+
+    def output_matrix(self):
+        """
+        Generates a set of tables, groped by subsystem, containing
+        what's the feature state on each architecture.
+        """
+
+        title = "Feature status on all architectures"
+
+        self.emit("=" * len(title))
+        self.emit(title)
+        self.emit("=" * len(title))
+        self.emit()
+
+        desc_title = f"{self.h_kconfig} / {self.h_description}"
+
+        desc_size = self.max_size_kconfig + 4
+        if not self.description_size:
+            desc_size = max(self.max_size_description, desc_size)
+        else:
+            desc_size = max(self.description_size, desc_size)
+
+        desc_size = max(self.max_size_desc_word, desc_size, len(desc_title))
+
+        notcompat = "Not compatible"
+        self.max_size_status = max(self.max_size_status, len(notcompat))
+
+        min_status_size = self.max_size_status + self.max_size_arch + 4
+        max_size_status = max(min_status_size, self.max_size_status)
+
+        h_status_per_arch = "Status per architecture"
+        max_size_status = max(max_size_status, len(h_status_per_arch))
+
+        cur_subsys = None
+        for name in sorted(self.data.keys(),
+                           key=lambda x: (self.data[x]["subsys"], x.lower())):
+            if not cur_subsys or cur_subsys != self.data[name]["subsys"]:
+                if cur_subsys:
+                    self.emit()
+
+                cur_subsys = self.data[name]["subsys"]
+
+                title = f"Subsystem: {cur_subsys}"
+                self.emit(title)
+                self.emit("=" * len(title))
+                self.emit()
+
+                self.matrix_lines(desc_size, max_size_status, 0)
+
+                self.emit(f"|{self.h_name:<{self.max_size_name}}", end="")
+                self.emit(f"|{desc_title:<{desc_size}}", end="")
+                self.emit(f"|{h_status_per_arch:<{max_size_status}}|")
+
+                self.matrix_lines(desc_size, max_size_status, 1)
+
+            lines = []
+            descs = []
+            cur_status = ""
+            line = ""
+
+            arch_table = sorted(self.data[name]["table"].items(),
+                                key=lambda x: (self.status_map.get(x[1], 99),
+                                               x[0].lower()))
+
+            for arch, status in arch_table:
+                if status == "---":
+                    status = notcompat
+
+                if status != cur_status:
+                    if line != "":
+                        lines.append(line)
+                        line = ""
+                    line = f"- **{status}**: {arch}"
+                elif len(line) + len(arch) + 2 < max_size_status:
+                    line += f", {arch}"
+                else:
+                    lines.append(line)
+                    line = f"  {arch}"
+                cur_status = status
+
+            if line != "":
+                lines.append(line)
+
+            description = self.data[name]["description"]
+            while len(description) > desc_size:
+                desc_line = description[:desc_size]
+
+                last_space = desc_line.rfind(" ")
+                if last_space != -1:
+                    desc_line = desc_line[:last_space]
+                    descs.append(desc_line)
+                    description = description[last_space + 1:]
+                else:
+                    desc_line = desc_line[:-1]
+                    descs.append(desc_line + "\\")
+                    description = description[len(desc_line):]
+
+            if description:
+                descs.append(description)
+
+            while len(lines) < 2 + len(descs):
+                lines.append("")
+
+            for ln, line in enumerate(lines):
+                col = ["", ""]
+
+                if not ln:
+                    col[0] = name
+                    col[1] = f"``{self.data[name]['kconfig']}``"
+                else:
+                    if ln >= 2 and descs:
+                        col[1] = descs.pop(0)
+
+                self.emit(f"|{col[0]:<{self.max_size_name}}", end="")
+                self.emit(f"|{col[1]:<{desc_size}}", end="")
+                self.emit(f"|{line:<{max_size_status}}|")
+
+            self.matrix_lines(desc_size, max_size_status, 0)
+
+        return self.msg
+
+    def list_arch_features(self, arch, feat):
+        """
+        Print a matrix of kernel feature support for the chosen architecture.
+        """
+        self.emit("#")
+        self.emit(f"# Kernel feature support matrix of the '{arch}' architecture:")
+        self.emit("#")
+
+        # Sort by subsystem, then by feature name (case‑insensitive)
+        for name in sorted(self.data.keys(),
+                           key=lambda n: (self.data[n]["subsys"].lower(),
+                                          n.lower())):
+            if feat and name != feat:
+                continue
+
+            feature = self.data[name]
+            arch_table = feature["table"]
+            status = arch_table.get(arch, "")
+            status = " " * ((4 - len(status)) // 2) + status
+
+            self.emit(f"{feature['subsys']:>{self.max_size_subsys + 1}}/ ",
+                      end="")
+            self.emit(f"{name:<{self.max_size_name}}: ", end="")
+            self.emit(f"{status:<5}|   ", end="")
+            self.emit(f"{feature['kconfig']:>{self.max_size_kconfig}} ",
+                      end="")
+            self.emit(f"#  {feature['description']}")
+
+        return self.msg
diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py
new file mode 100755
index 000000000000..a24f30ef4fa8
--- /dev/null
+++ b/tools/lib/python/jobserver.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0+
+#
+# pylint: disable=C0103,C0209
+#
+#
+
+"""
+Interacts with the POSIX jobserver during the Kernel build time.
+
+A "normal" jobserver task, like the one initiated by a make subrocess would do:
+
+    - open read/write file descriptors to communicate with the job server;
+    - ask for one slot by calling:
+        claim = os.read(reader, 1)
+    - when the job finshes, call:
+        os.write(writer, b"+")  # os.write(writer, claim)
+
+Here, the goal is different: This script aims to get the remaining number
+of slots available, using all of them to run a command which handle tasks in
+parallel. To to that, it has a loop that ends only after there are no
+slots left. It then increments the number by one, in order to allow a
+call equivalent to make -j$((claim+1)), e.g. having a parent make creating
+$claim child to do the actual work.
+
+The end goal here is to keep the total number of build tasks under the
+limit established by the initial make -j$n_proc call.
+
+See:
+    https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver
+"""
+
+import errno
+import os
+import subprocess
+import sys
+
+class JobserverExec:
+    """
+    Claim all slots from make using POSIX Jobserver.
+
+    The main methods here are:
+    - open(): reserves all slots;
+    - close(): method returns all used slots back to make;
+    - run(): executes a command setting PARALLELISM=<available slots jobs + 1>
+    """
+
+    def __init__(self):
+        """Initialize internal vars"""
+        self.claim = 0
+        self.jobs = b""
+        self.reader = None
+        self.writer = None
+        self.is_open = False
+
+    def open(self):
+        """Reserve all available slots to be claimed later on"""
+
+        if self.is_open:
+            return
+
+        try:
+            # Fetch the make environment options.
+            flags = os.environ["MAKEFLAGS"]
+            # Look for "--jobserver=R,W"
+            # Note that GNU Make has used --jobserver-fds and --jobserver-auth
+            # so this handles all of them.
+            opts = [x for x in flags.split(" ") if x.startswith("--jobserver")]
+
+            # Parse out R,W file descriptor numbers and set them nonblocking.
+            # If the MAKEFLAGS variable contains multiple instances of the
+            # --jobserver-auth= option, the last one is relevant.
+            fds = opts[-1].split("=", 1)[1]
+
+            # Starting with GNU Make 4.4, named pipes are used for reader
+            # and writer.
+            # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134
+            _, _, path = fds.partition("fifo:")
+
+            if path:
+                self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
+                self.writer = os.open(path, os.O_WRONLY)
+            else:
+                self.reader, self.writer = [int(x) for x in fds.split(",", 1)]
+                # Open a private copy of reader to avoid setting nonblocking
+                # on an unexpecting process with the same reader fd.
+                self.reader = os.open("/proc/self/fd/%d" % (self.reader),
+                                      os.O_RDONLY | os.O_NONBLOCK)
+
+            # Read out as many jobserver slots as possible
+            while True:
+                try:
+                    slot = os.read(self.reader, 8)
+                    self.jobs += slot
+                except (OSError, IOError) as e:
+                    if e.errno == errno.EWOULDBLOCK:
+                        # Stop at the end of the jobserver queue.
+                        break
+                    # If something went wrong, give back the jobs.
+                    if self.jobs:
+                        os.write(self.writer, self.jobs)
+                    raise e
+
+            # Add a bump for our caller's reserveration, since we're just going
+            # to sit here blocked on our child.
+            self.claim = len(self.jobs) + 1
+
+        except (KeyError, IndexError, ValueError, OSError, IOError):
+            # Any missing environment strings or bad fds should result in just
+            # not being parallel.
+            self.claim = None
+
+        self.is_open = True
+
+    def close(self):
+        """Return all reserved slots to Jobserver"""
+
+        if not self.is_open:
+            return
+
+        # Return all the reserved slots.
+        if len(self.jobs):
+            os.write(self.writer, self.jobs)
+
+        self.is_open = False
+
+    def __enter__(self):
+        self.open()
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        self.close()
+
+    def run(self, cmd, *args, **pwargs):
+        """
+        Run a command setting PARALLELISM env variable to the number of
+        available job slots (claim) + 1, e.g. it will reserve claim slots
+        to do the actual build work, plus one to monitor its children.
+        """
+        self.open()             # Ensure that self.claim is set
+
+        # We can only claim parallelism if there was a jobserver (i.e. a
+        # top-level "-jN" argument) and there were no other failures. Otherwise
+        # leave out the environment variable and let the child figure out what
+        # is best.
+        if self.claim:
+            os.environ["PARALLELISM"] = str(self.claim)
+
+        return subprocess.call(cmd, *args, **pwargs)
diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/kdoc/__init__.py
diff --git a/tools/docs/lib/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py
index bb171567a4ca..bb171567a4ca 100644
--- a/tools/docs/lib/enrich_formatter.py
+++ b/tools/lib/python/kdoc/enrich_formatter.py
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
new file mode 100644
index 000000000000..bfe02baf1606
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=R0903,R0913,R0914,R0917
+
+"""
+Parse lernel-doc tags on multiple kernel source files.
+"""
+
+import argparse
+import logging
+import os
+import re
+
+from kdoc.kdoc_parser import KernelDoc
+from kdoc.kdoc_output import OutputFormat
+
+
+class GlobSourceFiles:
+    """
+    Parse C source code file names and directories via an Interactor.
+    """
+
+    def __init__(self, srctree=None, valid_extensions=None):
+        """
+        Initialize valid extensions with a tuple.
+
+        If not defined, assume default C extensions (.c and .h)
+
+        It would be possible to use python's glob function, but it is
+        very slow, and it is not interactive. So, it would wait to read all
+        directories before actually do something.
+
+        So, let's use our own implementation.
+        """
+
+        if not valid_extensions:
+            self.extensions = (".c", ".h")
+        else:
+            self.extensions = valid_extensions
+
+        self.srctree = srctree
+
+    def _parse_dir(self, dirname):
+        """Internal function to parse files recursively"""
+
+        with os.scandir(dirname) as obj:
+            for entry in obj:
+                name = os.path.join(dirname, entry.name)
+
+                if entry.is_dir(follow_symlinks=False):
+                    yield from self._parse_dir(name)
+
+                if not entry.is_file():
+                    continue
+
+                basename = os.path.basename(name)
+
+                if not basename.endswith(self.extensions):
+                    continue
+
+                yield name
+
+    def parse_files(self, file_list, file_not_found_cb):
+        """
+        Define an iterator to parse all source files from file_list,
+        handling directories if any
+        """
+
+        if not file_list:
+            return
+
+        for fname in file_list:
+            if self.srctree:
+                f = os.path.join(self.srctree, fname)
+            else:
+                f = fname
+
+            if os.path.isdir(f):
+                yield from self._parse_dir(f)
+            elif os.path.isfile(f):
+                yield f
+            elif file_not_found_cb:
+                file_not_found_cb(fname)
+
+
+class KernelFiles():
+    """
+    Parse kernel-doc tags on multiple kernel source files.
+
+    There are two type of parsers defined here:
+        - self.parse_file(): parses both kernel-doc markups and
+          EXPORT_SYMBOL* macros;
+        - self.process_export_file(): parses only EXPORT_SYMBOL* macros.
+    """
+
+    def warning(self, msg):
+        """Ancillary routine to output a warning and increment error count"""
+
+        self.config.log.warning(msg)
+        self.errors += 1
+
+    def error(self, msg):
+        """Ancillary routine to output an error and increment error count"""
+
+        self.config.log.error(msg)
+        self.errors += 1
+
+    def parse_file(self, fname):
+        """
+        Parse a single Kernel source.
+        """
+
+        # Prevent parsing the same file twice if results are cached
+        if fname in self.files:
+            return
+
+        doc = KernelDoc(self.config, fname)
+        export_table, entries = doc.parse_kdoc()
+
+        self.export_table[fname] = export_table
+
+        self.files.add(fname)
+        self.export_files.add(fname)      # parse_kdoc() already check exports
+
+        self.results[fname] = entries
+
+    def process_export_file(self, fname):
+        """
+        Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+        """
+
+        # Prevent parsing the same file twice if results are cached
+        if fname in self.export_files:
+            return
+
+        doc = KernelDoc(self.config, fname)
+        export_table = doc.parse_export()
+
+        if not export_table:
+            self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}")
+            export_table = set()
+
+        self.export_table[fname] = export_table
+        self.export_files.add(fname)
+
+    def file_not_found_cb(self, fname):
+        """
+        Callback to warn if a file was not found.
+        """
+
+        self.error(f"Cannot find file {fname}")
+
+    def __init__(self, verbose=False, out_style=None,
+                 werror=False, wreturn=False, wshort_desc=False,
+                 wcontents_before_sections=False,
+                 logger=None):
+        """
+        Initialize startup variables and parse all files
+        """
+
+        if not verbose:
+            verbose = bool(os.environ.get("KBUILD_VERBOSE", 0))
+
+        if out_style is None:
+            out_style = OutputFormat()
+
+        if not werror:
+            kcflags = os.environ.get("KCFLAGS", None)
+            if kcflags:
+                match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags)
+                if match:
+                    werror = True
+
+            # reading this variable is for backwards compat just in case
+            # someone was calling it with the variable from outside the
+            # kernel's build system
+            kdoc_werror = os.environ.get("KDOC_WERROR", None)
+            if kdoc_werror:
+                werror = kdoc_werror
+
+        # Some variables are global to the parser logic as a whole as they are
+        # used to send control configuration to KernelDoc class. As such,
+        # those variables are read-only inside the KernelDoc.
+        self.config = argparse.Namespace
+
+        self.config.verbose = verbose
+        self.config.werror = werror
+        self.config.wreturn = wreturn
+        self.config.wshort_desc = wshort_desc
+        self.config.wcontents_before_sections = wcontents_before_sections
+
+        if not logger:
+            self.config.log = logging.getLogger("kernel-doc")
+        else:
+            self.config.log = logger
+
+        self.config.warning = self.warning
+
+        self.config.src_tree = os.environ.get("SRCTREE", None)
+
+        # Initialize variables that are internal to KernelFiles
+
+        self.out_style = out_style
+
+        self.errors = 0
+        self.results = {}
+
+        self.files = set()
+        self.export_files = set()
+        self.export_table = {}
+
+    def parse(self, file_list, export_file=None):
+        """
+        Parse all files
+        """
+
+        glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+        for fname in glob.parse_files(file_list, self.file_not_found_cb):
+            self.parse_file(fname)
+
+        for fname in glob.parse_files(export_file, self.file_not_found_cb):
+            self.process_export_file(fname)
+
+    def out_msg(self, fname, name, arg):
+        """
+        Return output messages from a file name using the output style
+        filtering.
+
+        If output type was not handled by the styler, return None.
+        """
+
+        # NOTE: we can add rules here to filter out unwanted parts,
+        # although OutputFormat.msg already does that.
+
+        return self.out_style.msg(fname, name, arg)
+
+    def msg(self, enable_lineno=False, export=False, internal=False,
+            symbol=None, nosymbol=None, no_doc_sections=False,
+            filenames=None, export_file=None):
+        """
+        Interacts over the kernel-doc results and output messages,
+        returning kernel-doc markups on each interaction
+        """
+
+        self.out_style.set_config(self.config)
+
+        if not filenames:
+            filenames = sorted(self.results.keys())
+
+        glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+        for fname in filenames:
+            function_table = set()
+
+            if internal or export:
+                if not export_file:
+                    export_file = [fname]
+
+                for f in glob.parse_files(export_file, self.file_not_found_cb):
+                    function_table |= self.export_table[f]
+
+            if symbol:
+                for s in symbol:
+                    function_table.add(s)
+
+            self.out_style.set_filter(export, internal, symbol, nosymbol,
+                                      function_table, enable_lineno,
+                                      no_doc_sections)
+
+            msg = ""
+            if fname not in self.results:
+                self.config.log.warning("No kernel-doc for file %s", fname)
+                continue
+
+            symbols = self.results[fname]
+            self.out_style.set_symbols(symbols)
+
+            for arg in symbols:
+                m = self.out_msg(fname, arg.name, arg)
+
+                if m is None:
+                    ln = arg.get("ln", 0)
+                    dtype = arg.get('type', "")
+
+                    self.config.log.warning("%s:%d Can't handle %s",
+                                            fname, ln, dtype)
+                else:
+                    msg += m
+
+            if msg:
+                yield fname, msg
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
new file mode 100644
index 000000000000..19805301cb2c
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# A class that will, eventually, encapsulate all of the parsed data that we
+# then pass into the output modules.
+#
+
+class KdocItem:
+    def __init__(self, name, fname, type, start_line, **other_stuff):
+        self.name = name
+        self.fname = fname
+        self.type = type
+        self.declaration_start_line = start_line
+        self.sections = {}
+        self.sections_start_lines = {}
+        self.parameterlist = []
+        self.parameterdesc_start_lines = []
+        self.parameterdescs = {}
+        self.parametertypes = {}
+        #
+        # Just save everything else into our own dict so that the output
+        # side can grab it directly as before.  As we move things into more
+        # structured data, this will, hopefully, fade away.
+        #
+        self.other_stuff = other_stuff
+
+    def get(self, key, default = None):
+        return self.other_stuff.get(key, default)
+
+    def __getitem__(self, key):
+        return self.get(key)
+
+    #
+    # Tracking of section and parameter information.
+    #
+    def set_sections(self, sections, start_lines):
+        self.sections = sections
+        self.section_start_lines = start_lines
+
+    def set_params(self, names, descs, types, starts):
+        self.parameterlist = names
+        self.parameterdescs = descs
+        self.parametertypes = types
+        self.parameterdesc_start_lines = starts
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
new file mode 100644
index 000000000000..b1aaa7fc3604
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -0,0 +1,824 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917
+
+"""
+Implement output filters to print kernel-doc documentation.
+
+The implementation uses a virtual base class (OutputFormat) which
+contains dispatches to virtual methods, and some code to filter
+out output messages.
+
+The actual implementation is done on one separate class per each type
+of output. Currently, there are output classes for ReST and man/troff.
+"""
+
+import os
+import re
+from datetime import datetime
+
+from kdoc.kdoc_parser import KernelDoc, type_param
+from kdoc.kdoc_re import KernRe
+
+
+function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False)
+
+# match expressions used to find embedded type information
+type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False)
+type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False)
+type_func = KernRe(r"(\w+)\(\)", cache=False)
+type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+# Special RST handling for func ptr params
+type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False)
+
+# Special RST handling for structs with func ptr params
+type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False)
+
+type_env = KernRe(r"(\$\w+)", cache=False)
+type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False)
+type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False)
+type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False)
+type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False)
+type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False)
+type_fallback = KernRe(r"\&([_\w]+)", cache=False)
+type_member_func = type_member + KernRe(r"\(\)", cache=False)
+
+
+class OutputFormat:
+    """
+    Base class for OutputFormat. If used as-is, it means that only
+    warnings will be displayed.
+    """
+
+    # output mode.
+    OUTPUT_ALL          = 0 # output all symbols and doc sections
+    OUTPUT_INCLUDE      = 1 # output only specified symbols
+    OUTPUT_EXPORTED     = 2 # output exported symbols
+    OUTPUT_INTERNAL     = 3 # output non-exported symbols
+
+    # Virtual member to be overridden at the inherited classes
+    highlights = []
+
+    def __init__(self):
+        """Declare internal vars and set mode to OUTPUT_ALL"""
+
+        self.out_mode = self.OUTPUT_ALL
+        self.enable_lineno = None
+        self.nosymbol = {}
+        self.symbol = None
+        self.function_table = None
+        self.config = None
+        self.no_doc_sections = False
+
+        self.data = ""
+
+    def set_config(self, config):
+        """
+        Setup global config variables used by both parser and output.
+        """
+
+        self.config = config
+
+    def set_filter(self, export, internal, symbol, nosymbol, function_table,
+                   enable_lineno, no_doc_sections):
+        """
+        Initialize filter variables according to the requested mode.
+
+        Only one choice is valid between export, internal and symbol.
+
+        The nosymbol filter can be used on all modes.
+        """
+
+        self.enable_lineno = enable_lineno
+        self.no_doc_sections = no_doc_sections
+        self.function_table = function_table
+
+        if symbol:
+            self.out_mode = self.OUTPUT_INCLUDE
+        elif export:
+            self.out_mode = self.OUTPUT_EXPORTED
+        elif internal:
+            self.out_mode = self.OUTPUT_INTERNAL
+        else:
+            self.out_mode = self.OUTPUT_ALL
+
+        if nosymbol:
+            self.nosymbol = set(nosymbol)
+
+
+    def highlight_block(self, block):
+        """
+        Apply the RST highlights to a sub-block of text.
+        """
+
+        for r, sub in self.highlights:
+            block = r.sub(sub, block)
+
+        return block
+
+    def out_warnings(self, args):
+        """
+        Output warnings for identifiers that will be displayed.
+        """
+
+        for log_msg in args.warnings:
+            self.config.warning(log_msg)
+
+    def check_doc(self, name, args):
+        """Check if DOC should be output"""
+
+        if self.no_doc_sections:
+            return False
+
+        if name in self.nosymbol:
+            return False
+
+        if self.out_mode == self.OUTPUT_ALL:
+            self.out_warnings(args)
+            return True
+
+        if self.out_mode == self.OUTPUT_INCLUDE:
+            if name in self.function_table:
+                self.out_warnings(args)
+                return True
+
+        return False
+
+    def check_declaration(self, dtype, name, args):
+        """
+        Checks if a declaration should be output or not based on the
+        filtering criteria.
+        """
+
+        if name in self.nosymbol:
+            return False
+
+        if self.out_mode == self.OUTPUT_ALL:
+            self.out_warnings(args)
+            return True
+
+        if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]:
+            if name in self.function_table:
+                return True
+
+        if self.out_mode == self.OUTPUT_INTERNAL:
+            if dtype != "function":
+                self.out_warnings(args)
+                return True
+
+            if name not in self.function_table:
+                self.out_warnings(args)
+                return True
+
+        return False
+
+    def msg(self, fname, name, args):
+        """
+        Handles a single entry from kernel-doc parser
+        """
+
+        self.data = ""
+
+        dtype = args.type
+
+        if dtype == "doc":
+            self.out_doc(fname, name, args)
+            return self.data
+
+        if not self.check_declaration(dtype, name, args):
+            return self.data
+
+        if dtype == "function":
+            self.out_function(fname, name, args)
+            return self.data
+
+        if dtype == "enum":
+            self.out_enum(fname, name, args)
+            return self.data
+
+        if dtype == "typedef":
+            self.out_typedef(fname, name, args)
+            return self.data
+
+        if dtype in ["struct", "union"]:
+            self.out_struct(fname, name, args)
+            return self.data
+
+        # Warn if some type requires an output logic
+        self.config.log.warning("doesn't know how to output '%s' block",
+                                dtype)
+
+        return None
+
+    # Virtual methods to be overridden by inherited classes
+    # At the base class, those do nothing.
+    def set_symbols(self, symbols):
+        """Get a list of all symbols from kernel_doc"""
+
+    def out_doc(self, fname, name, args):
+        """Outputs a DOC block"""
+
+    def out_function(self, fname, name, args):
+        """Outputs a function"""
+
+    def out_enum(self, fname, name, args):
+        """Outputs an enum"""
+
+    def out_typedef(self, fname, name, args):
+        """Outputs a typedef"""
+
+    def out_struct(self, fname, name, args):
+        """Outputs a struct"""
+
+
+class RestFormat(OutputFormat):
+    """Consts and functions used by ReST output"""
+
+    highlights = [
+        (type_constant, r"``\1``"),
+        (type_constant2, r"``\1``"),
+
+        # Note: need to escape () to avoid func matching later
+        (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"),
+        (type_member, r":c:type:`\1\2\3 <\1>`"),
+        (type_fp_param, r"**\1\\(\\)**"),
+        (type_fp_param2, r"**\1\\(\\)**"),
+        (type_func, r"\1()"),
+        (type_enum, r":c:type:`\1 <\2>`"),
+        (type_struct, r":c:type:`\1 <\2>`"),
+        (type_typedef, r":c:type:`\1 <\2>`"),
+        (type_union, r":c:type:`\1 <\2>`"),
+
+        # in rst this can refer to any type
+        (type_fallback, r":c:type:`\1`"),
+        (type_param_ref, r"**\1\2**")
+    ]
+    blankline = "\n"
+
+    sphinx_literal = KernRe(r'^[^.].*::$', cache=False)
+    sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False)
+
+    def __init__(self):
+        """
+        Creates class variables.
+
+        Not really mandatory, but it is a good coding style and makes
+        pylint happy.
+        """
+
+        super().__init__()
+        self.lineprefix = ""
+
+    def print_lineno(self, ln):
+        """Outputs a line number"""
+
+        if self.enable_lineno and ln is not None:
+            ln += 1
+            self.data += f".. LINENO {ln}\n"
+
+    def output_highlight(self, args):
+        """
+        Outputs a C symbol that may require being converted to ReST using
+        the self.highlights variable
+        """
+
+        input_text = args
+        output = ""
+        in_literal = False
+        litprefix = ""
+        block = ""
+
+        for line in input_text.strip("\n").split("\n"):
+
+            # If we're in a literal block, see if we should drop out of it.
+            # Otherwise, pass the line straight through unmunged.
+            if in_literal:
+                if line.strip():  # If the line is not blank
+                    # If this is the first non-blank line in a literal block,
+                    # figure out the proper indent.
+                    if not litprefix:
+                        r = KernRe(r'^(\s*)')
+                        if r.match(line):
+                            litprefix = '^' + r.group(1)
+                        else:
+                            litprefix = ""
+
+                        output += line + "\n"
+                    elif not KernRe(litprefix).match(line):
+                        in_literal = False
+                    else:
+                        output += line + "\n"
+                else:
+                    output += line + "\n"
+
+            # Not in a literal block (or just dropped out)
+            if not in_literal:
+                block += line + "\n"
+                if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line):
+                    in_literal = True
+                    litprefix = ""
+                    output += self.highlight_block(block)
+                    block = ""
+
+        # Handle any remaining block
+        if block:
+            output += self.highlight_block(block)
+
+        # Print the output with the line prefix
+        for line in output.strip("\n").split("\n"):
+            self.data += self.lineprefix + line + "\n"
+
+    def out_section(self, args, out_docblock=False):
+        """
+        Outputs a block section.
+
+        This could use some work; it's used to output the DOC: sections, and
+        starts by putting out the name of the doc section itself, but that
+        tends to duplicate a header already in the template file.
+        """
+        for section, text in args.sections.items():
+            # Skip sections that are in the nosymbol_table
+            if section in self.nosymbol:
+                continue
+
+            if out_docblock:
+                if not self.out_mode == self.OUTPUT_INCLUDE:
+                    self.data += f".. _{section}:\n\n"
+                    self.data += f'{self.lineprefix}**{section}**\n\n'
+            else:
+                self.data += f'{self.lineprefix}**{section}**\n\n'
+
+            self.print_lineno(args.section_start_lines.get(section, 0))
+            self.output_highlight(text)
+            self.data += "\n"
+        self.data += "\n"
+
+    def out_doc(self, fname, name, args):
+        if not self.check_doc(name, args):
+            return
+        self.out_section(args, out_docblock=True)
+
+    def out_function(self, fname, name, args):
+
+        oldprefix = self.lineprefix
+        signature = ""
+
+        func_macro = args.get('func_macro', False)
+        if func_macro:
+            signature = name
+        else:
+            if args.get('functiontype'):
+                signature = args['functiontype'] + " "
+            signature += name + " ("
+
+        ln = args.declaration_start_line
+        count = 0
+        for parameter in args.parameterlist:
+            if count != 0:
+                signature += ", "
+            count += 1
+            dtype = args.parametertypes.get(parameter, "")
+
+            if function_pointer.search(dtype):
+                signature += function_pointer.group(1) + parameter + function_pointer.group(3)
+            else:
+                signature += dtype
+
+        if not func_macro:
+            signature += ")"
+
+        self.print_lineno(ln)
+        if args.get('typedef') or not args.get('functiontype'):
+            self.data += f".. c:macro:: {name}\n\n"
+
+            if args.get('typedef'):
+                self.data += "   **Typedef**: "
+                self.lineprefix = ""
+                self.output_highlight(args.get('purpose', ""))
+                self.data += "\n\n**Syntax**\n\n"
+                self.data += f"  ``{signature}``\n\n"
+            else:
+                self.data += f"``{signature}``\n\n"
+        else:
+            self.data += f".. c:function:: {signature}\n\n"
+
+        if not args.get('typedef'):
+            self.print_lineno(ln)
+            self.lineprefix = "   "
+            self.output_highlight(args.get('purpose', ""))
+            self.data += "\n"
+
+        # Put descriptive text into a container (HTML <div>) to help set
+        # function prototypes apart
+        self.lineprefix = "  "
+
+        if args.parameterlist:
+            self.data += ".. container:: kernelindent\n\n"
+            self.data += f"{self.lineprefix}**Parameters**\n\n"
+
+        for parameter in args.parameterlist:
+            parameter_name = KernRe(r'\[.*').sub('', parameter)
+            dtype = args.parametertypes.get(parameter, "")
+
+            if dtype:
+                self.data += f"{self.lineprefix}``{dtype}``\n"
+            else:
+                self.data += f"{self.lineprefix}``{parameter}``\n"
+
+            self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+            self.lineprefix = "    "
+            if parameter_name in args.parameterdescs and \
+               args.parameterdescs[parameter_name] != KernelDoc.undescribed:
+
+                self.output_highlight(args.parameterdescs[parameter_name])
+                self.data += "\n"
+            else:
+                self.data += f"{self.lineprefix}*undescribed*\n\n"
+            self.lineprefix = "  "
+
+        self.out_section(args)
+        self.lineprefix = oldprefix
+
+    def out_enum(self, fname, name, args):
+
+        oldprefix = self.lineprefix
+        ln = args.declaration_start_line
+
+        self.data += f"\n\n.. c:enum:: {name}\n\n"
+
+        self.print_lineno(ln)
+        self.lineprefix = "  "
+        self.output_highlight(args.get('purpose', ''))
+        self.data += "\n"
+
+        self.data += ".. container:: kernelindent\n\n"
+        outer = self.lineprefix + "  "
+        self.lineprefix = outer + "  "
+        self.data += f"{outer}**Constants**\n\n"
+
+        for parameter in args.parameterlist:
+            self.data += f"{outer}``{parameter}``\n"
+
+            if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed:
+                self.output_highlight(args.parameterdescs[parameter])
+            else:
+                self.data += f"{self.lineprefix}*undescribed*\n\n"
+            self.data += "\n"
+
+        self.lineprefix = oldprefix
+        self.out_section(args)
+
+    def out_typedef(self, fname, name, args):
+
+        oldprefix = self.lineprefix
+        ln = args.declaration_start_line
+
+        self.data += f"\n\n.. c:type:: {name}\n\n"
+
+        self.print_lineno(ln)
+        self.lineprefix = "   "
+
+        self.output_highlight(args.get('purpose', ''))
+
+        self.data += "\n"
+
+        self.lineprefix = oldprefix
+        self.out_section(args)
+
+    def out_struct(self, fname, name, args):
+
+        purpose = args.get('purpose', "")
+        declaration = args.get('definition', "")
+        dtype = args.type
+        ln = args.declaration_start_line
+
+        self.data += f"\n\n.. c:{dtype}:: {name}\n\n"
+
+        self.print_lineno(ln)
+
+        oldprefix = self.lineprefix
+        self.lineprefix += "  "
+
+        self.output_highlight(purpose)
+        self.data += "\n"
+
+        self.data += ".. container:: kernelindent\n\n"
+        self.data += f"{self.lineprefix}**Definition**::\n\n"
+
+        self.lineprefix = self.lineprefix + "  "
+
+        declaration = declaration.replace("\t", self.lineprefix)
+
+        self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n"
+        self.data += f"{declaration}{self.lineprefix}" + "};\n\n"
+
+        self.lineprefix = "  "
+        self.data += f"{self.lineprefix}**Members**\n\n"
+        for parameter in args.parameterlist:
+            if not parameter or parameter.startswith("#"):
+                continue
+
+            parameter_name = parameter.split("[", maxsplit=1)[0]
+
+            if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+                continue
+
+            self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+            self.data += f"{self.lineprefix}``{parameter}``\n"
+
+            self.lineprefix = "    "
+            self.output_highlight(args.parameterdescs[parameter_name])
+            self.lineprefix = "  "
+
+            self.data += "\n"
+
+        self.data += "\n"
+
+        self.lineprefix = oldprefix
+        self.out_section(args)
+
+
+class ManFormat(OutputFormat):
+    """Consts and functions used by man pages output"""
+
+    highlights = (
+        (type_constant, r"\1"),
+        (type_constant2, r"\1"),
+        (type_func, r"\\fB\1\\fP"),
+        (type_enum, r"\\fI\1\\fP"),
+        (type_struct, r"\\fI\1\\fP"),
+        (type_typedef, r"\\fI\1\\fP"),
+        (type_union, r"\\fI\1\\fP"),
+        (type_param, r"\\fI\1\\fP"),
+        (type_param_ref, r"\\fI\1\2\\fP"),
+        (type_member, r"\\fI\1\2\3\\fP"),
+        (type_fallback, r"\\fI\1\\fP")
+    )
+    blankline = ""
+
+    date_formats = [
+        "%a %b %d %H:%M:%S %Z %Y",
+        "%a %b %d %H:%M:%S %Y",
+        "%Y-%m-%d",
+        "%b %d %Y",
+        "%B %d %Y",
+        "%m %d %Y",
+    ]
+
+    def __init__(self, modulename):
+        """
+        Creates class variables.
+
+        Not really mandatory, but it is a good coding style and makes
+        pylint happy.
+        """
+
+        super().__init__()
+        self.modulename = modulename
+        self.symbols = []
+
+        dt = None
+        tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP")
+        if tstamp:
+            for fmt in self.date_formats:
+                try:
+                    dt = datetime.strptime(tstamp, fmt)
+                    break
+                except ValueError:
+                    pass
+
+        if not dt:
+            dt = datetime.now()
+
+        self.man_date = dt.strftime("%B %Y")
+
+    def arg_name(self, args, name):
+        """
+        Return the name that will be used for the man page.
+
+        As we may have the same name on different namespaces,
+        prepend the data type for all types except functions and typedefs.
+
+        The doc section is special: it uses the modulename.
+        """
+
+        dtype = args.type
+
+        if dtype == "doc":
+            return self.modulename
+
+        if dtype in ["function", "typedef"]:
+            return name
+
+        return f"{dtype} {name}"
+
+    def set_symbols(self, symbols):
+        """
+        Get a list of all symbols from kernel_doc.
+
+        Man pages will uses it to add a SEE ALSO section with other
+        symbols at the same file.
+        """
+        self.symbols = symbols
+
+    def out_tail(self, fname, name, args):
+        """Adds a tail for all man pages"""
+
+        # SEE ALSO section
+        self.data += f'.SH "SEE ALSO"' + "\n.PP\n"
+        self.data += (f"Kernel file \\fB{args.fname}\\fR\n")
+        if len(self.symbols) >= 2:
+            cur_name = self.arg_name(args, name)
+
+            related = []
+            for arg in self.symbols:
+                out_name = self.arg_name(arg, arg.name)
+
+                if cur_name == out_name:
+                    continue
+
+                related.append(f"\\fB{out_name}\\fR(9)")
+
+            self.data += ",\n".join(related) + "\n"
+
+        # TODO: does it make sense to add other sections? Maybe
+        # REPORTING ISSUES? LICENSE?
+
+    def msg(self, fname, name, args):
+        """
+        Handles a single entry from kernel-doc parser.
+
+        Add a tail at the end of man pages output.
+        """
+        super().msg(fname, name, args)
+        self.out_tail(fname, name, args)
+
+        return self.data
+
+    def output_highlight(self, block):
+        """
+        Outputs a C symbol that may require being highlighted with
+        self.highlights variable using troff syntax
+        """
+
+        contents = self.highlight_block(block)
+
+        if isinstance(contents, list):
+            contents = "\n".join(contents)
+
+        for line in contents.strip("\n").split("\n"):
+            line = KernRe(r"^\s*").sub("", line)
+            if not line:
+                continue
+
+            if line[0] == ".":
+                self.data += "\\&" + line + "\n"
+            else:
+                self.data += line + "\n"
+
+    def out_doc(self, fname, name, args):
+        if not self.check_doc(name, args):
+            return
+
+        out_name = self.arg_name(args, name)
+
+        self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
+
+    def out_function(self, fname, name, args):
+        """output function in man"""
+
+        out_name = self.arg_name(args, name)
+
+        self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
+
+        self.data += ".SH NAME\n"
+        self.data += f"{name} \\- {args['purpose']}\n"
+
+        self.data += ".SH SYNOPSIS\n"
+        if args.get('functiontype', ''):
+            self.data += f'.B "{args["functiontype"]}" {name}' + "\n"
+        else:
+            self.data += f'.B "{name}' + "\n"
+
+        count = 0
+        parenth = "("
+        post = ","
+
+        for parameter in args.parameterlist:
+            if count == len(args.parameterlist) - 1:
+                post = ");"
+
+            dtype = args.parametertypes.get(parameter, "")
+            if function_pointer.match(dtype):
+                # Pointer-to-function
+                self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n"
+            else:
+                dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype)
+
+                self.data += f'.BI "{parenth}{dtype}"  "{post}"' + "\n"
+            count += 1
+            parenth = ""
+
+        if args.parameterlist:
+            self.data += ".SH ARGUMENTS\n"
+
+        for parameter in args.parameterlist:
+            parameter_name = re.sub(r'\[.*', '', parameter)
+
+            self.data += f'.IP "{parameter}" 12' + "\n"
+            self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section.upper()}"' + "\n"
+            self.output_highlight(text)
+
+    def out_enum(self, fname, name, args):
+        out_name = self.arg_name(args, name)
+
+        self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+        self.data += ".SH NAME\n"
+        self.data += f"enum {name} \\- {args['purpose']}\n"
+
+        self.data += ".SH SYNOPSIS\n"
+        self.data += f"enum {name}" + " {\n"
+
+        count = 0
+        for parameter in args.parameterlist:
+            self.data += f'.br\n.BI "    {parameter}"' + "\n"
+            if count == len(args.parameterlist) - 1:
+                self.data += "\n};\n"
+            else:
+                self.data += ", \n.br\n"
+
+            count += 1
+
+        self.data += ".SH Constants\n"
+
+        for parameter in args.parameterlist:
+            parameter_name = KernRe(r'\[.*').sub('', parameter)
+            self.data += f'.IP "{parameter}" 12' + "\n"
+            self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
+
+    def out_typedef(self, fname, name, args):
+        module = self.modulename
+        purpose = args.get('purpose')
+        out_name = self.arg_name(args, name)
+
+        self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+        self.data += ".SH NAME\n"
+        self.data += f"typedef {name} \\- {purpose}\n"
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
+
+    def out_struct(self, fname, name, args):
+        module = self.modulename
+        purpose = args.get('purpose')
+        definition = args.get('definition')
+        out_name = self.arg_name(args, name)
+
+        self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+        self.data += ".SH NAME\n"
+        self.data += f"{args.type} {name} \\- {purpose}\n"
+
+        # Replace tabs with two spaces and handle newlines
+        declaration = definition.replace("\t", "  ")
+        declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration)
+
+        self.data += ".SH SYNOPSIS\n"
+        self.data += f"{args.type} {name} " + "{" + "\n.br\n"
+        self.data += f'.BI "{declaration}\n' + "};\n.br\n\n"
+
+        self.data += ".SH Members\n"
+        for parameter in args.parameterlist:
+            if parameter.startswith("#"):
+                continue
+
+            parameter_name = re.sub(r"\[.*", "", parameter)
+
+            if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+                continue
+
+            self.data += f'.IP "{parameter}" 12' + "\n"
+            self.output_highlight(args.parameterdescs.get(parameter_name))
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
new file mode 100644
index 000000000000..500aafc50032
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -0,0 +1,1670 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
+
+"""
+kdoc_parser
+===========
+
+Read a C language source or header FILE and extract embedded
+documentation comments
+"""
+
+import sys
+import re
+from pprint import pformat
+
+from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.kdoc_item import KdocItem
+
+#
+# Regular expressions used to parse kernel-doc markups at KernelDoc class.
+#
+# Let's declare them in lowercase outside any class to make it easier to
+# convert from the Perl script.
+#
+# As those are evaluated at the beginning, no need to cache them
+#
+
+# Allow whitespace at end of comment start.
+doc_start = KernRe(r'^/\*\*\s*$', cache=False)
+
+doc_end = KernRe(r'\*/', cache=False)
+doc_com = KernRe(r'\s*\*\s*', cache=False)
+doc_com_body = KernRe(r'\s*\* ?', cache=False)
+doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
+
+# @params and a strictly limited set of supported section names
+# Specifically:
+#   Match @word:
+#         @...:
+#         @{section-name}:
+# while trying to not match literal block starts like "example::"
+#
+known_section_names = 'description|context|returns?|notes?|examples?'
+known_sections = KernRe(known_section_names, flags = re.I)
+doc_sect = doc_com + \
+    KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
+           flags=re.I, cache=False)
+
+doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
+doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
+doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
+doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
+doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False)
+
+export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
+export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
+
+type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+#
+# Tests for the beginning of a kerneldoc block in its various forms.
+#
+doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
+doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False)
+doc_begin_func = KernRe(str(doc_com) +			# initial " * '
+                        r"(?:\w+\s*\*\s*)?" + 		# type (not captured)
+                        r'(?:define\s+)?' + 		# possible "define" (not captured)
+                        r'(\w+)\s*(?:\(\w*\))?\s*' +	# name and optional "(...)"
+                        r'(?:[-:].*)?$',		# description (not captured)
+                        cache = False)
+
+#
+# Here begins a long set of transformations to turn structure member prefixes
+# and macro invocations into something we can parse and generate kdoc for.
+#
+struct_args_pattern = r'([^,)]+)'
+
+struct_xforms = [
+    # Strip attributes
+    (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
+    (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
+    (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
+    (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+    (KernRe(r'\s*__packed\s*', re.S), ' '),
+    (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
+    (KernRe(r'\s*__private', re.S), ' '),
+    (KernRe(r'\s*__rcu', re.S), ' '),
+    (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
+    (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
+    (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
+    #
+    # Unwrap struct_group macros based on this definition:
+    # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
+    # which has variants like: struct_group(NAME, MEMBERS...)
+    # Only MEMBERS arguments require documentation.
+    #
+    # Parsing them happens on two steps:
+    #
+    # 1. drop struct group arguments that aren't at MEMBERS,
+    #    storing them as STRUCT_GROUP(MEMBERS)
+    #
+    # 2. remove STRUCT_GROUP() ancillary macro.
+    #
+    # The original logic used to remove STRUCT_GROUP() using an
+    # advanced regex:
+    #
+    #   \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
+    #
+    # with two patterns that are incompatible with
+    # Python re module, as it has:
+    #
+    #   - a recursive pattern: (?1)
+    #   - an atomic grouping: (?>...)
+    #
+    # I tried a simpler version: but it didn't work either:
+    #   \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
+    #
+    # As it doesn't properly match the end parenthesis on some cases.
+    #
+    # So, a better solution was crafted: there's now a NestedMatch
+    # class that ensures that delimiters after a search are properly
+    # matched. So, the implementation to drop STRUCT_GROUP() will be
+    # handled in separate.
+    #
+    (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
+    (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
+    (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
+    (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
+    #
+    # Replace macros
+    #
+    # TODO: use NestedMatch for FOO($1, $2, ...) matches
+    #
+    # it is better to also move those to the NestedMatch logic,
+    # to ensure that parentheses will be properly matched.
+    #
+    (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
+     r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+    (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
+     r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+    (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+            re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+    (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+            re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
+    (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
+            r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+    (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
+            struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+    (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
+            struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
+    (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
+    (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+]
+#
+# Regexes here are guaranteed to have the end delimiter matching
+# the start delimiter. Yet, right now, only one replace group
+# is allowed.
+#
+struct_nested_prefixes = [
+    (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
+]
+
+#
+# Transforms for function prototypes
+#
+function_xforms  = [
+    (KernRe(r"^static +"), ""),
+    (KernRe(r"^extern +"), ""),
+    (KernRe(r"^asmlinkage +"), ""),
+    (KernRe(r"^inline +"), ""),
+    (KernRe(r"^__inline__ +"), ""),
+    (KernRe(r"^__inline +"), ""),
+    (KernRe(r"^__always_inline +"), ""),
+    (KernRe(r"^noinline +"), ""),
+    (KernRe(r"^__FORTIFY_INLINE +"), ""),
+    (KernRe(r"__init +"), ""),
+    (KernRe(r"__init_or_module +"), ""),
+    (KernRe(r"__deprecated +"), ""),
+    (KernRe(r"__flatten +"), ""),
+    (KernRe(r"__meminit +"), ""),
+    (KernRe(r"__must_check +"), ""),
+    (KernRe(r"__weak +"), ""),
+    (KernRe(r"__sched +"), ""),
+    (KernRe(r"_noprof"), ""),
+    (KernRe(r"__always_unused *"), ""),
+    (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
+    (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
+    (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
+    (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+    (KernRe(r"__attribute_const__ +"), ""),
+    (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
+]
+
+#
+# Apply a set of transforms to a block of text.
+#
+def apply_transforms(xforms, text):
+    for search, subst in xforms:
+        text = search.sub(subst, text)
+    return text
+
+#
+# A little helper to get rid of excess white space
+#
+multi_space = KernRe(r'\s\s+')
+def trim_whitespace(s):
+    return multi_space.sub(' ', s.strip())
+
+#
+# Remove struct/enum members that have been marked "private".
+#
+def trim_private_members(text):
+    #
+    # First look for a "public:" block that ends a private region, then
+    # handle the "private until the end" case.
+    #
+    text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
+    text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
+    #
+    # We needed the comments to do the above, but now we can take them out.
+    #
+    return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+class state:
+    """
+    State machine enums
+    """
+
+    # Parser states
+    NORMAL        = 0        # normal code
+    NAME          = 1        # looking for function name
+    DECLARATION   = 2        # We have seen a declaration which might not be done
+    BODY          = 3        # the body of the comment
+    SPECIAL_SECTION = 4      # doc section ending with a blank line
+    PROTO         = 5        # scanning prototype
+    DOCBLOCK      = 6        # documentation block
+    INLINE_NAME   = 7        # gathering doc outside main block
+    INLINE_TEXT   = 8	     # reading the body of inline docs
+
+    name = [
+        "NORMAL",
+        "NAME",
+        "DECLARATION",
+        "BODY",
+        "SPECIAL_SECTION",
+        "PROTO",
+        "DOCBLOCK",
+        "INLINE_NAME",
+        "INLINE_TEXT",
+    ]
+
+
+SECTION_DEFAULT = "Description"  # default section
+
+class KernelEntry:
+
+    def __init__(self, config, fname, ln):
+        self.config = config
+        self.fname = fname
+
+        self._contents = []
+        self.prototype = ""
+
+        self.warnings = []
+
+        self.parameterlist = []
+        self.parameterdescs = {}
+        self.parametertypes = {}
+        self.parameterdesc_start_lines = {}
+
+        self.section_start_lines = {}
+        self.sections = {}
+
+        self.anon_struct_union = False
+
+        self.leading_space = None
+
+        self.fname = fname
+
+        # State flags
+        self.brcount = 0
+        self.declaration_start_line = ln + 1
+
+    #
+    # Management of section contents
+    #
+    def add_text(self, text):
+        self._contents.append(text)
+
+    def contents(self):
+        return '\n'.join(self._contents) + '\n'
+
+    # TODO: rename to emit_message after removal of kernel-doc.pl
+    def emit_msg(self, ln, msg, *, warning=True):
+        """Emit a message"""
+
+        log_msg = f"{self.fname}:{ln} {msg}"
+
+        if not warning:
+            self.config.log.info(log_msg)
+            return
+
+        # Delegate warning output to output logic, as this way it
+        # will report warnings/info only for symbols that are output
+
+        self.warnings.append(log_msg)
+        return
+
+    #
+    # Begin a new section.
+    #
+    def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
+        if dump:
+            self.dump_section(start_new = True)
+        self.section = title
+        self.new_start_line = line_no
+
+    def dump_section(self, start_new=True):
+        """
+        Dumps section contents to arrays/hashes intended for that purpose.
+        """
+        #
+        # If we have accumulated no contents in the default ("description")
+        # section, don't bother.
+        #
+        if self.section == SECTION_DEFAULT and not self._contents:
+            return
+        name = self.section
+        contents = self.contents()
+
+        if type_param.match(name):
+            name = type_param.group(1)
+
+            self.parameterdescs[name] = contents
+            self.parameterdesc_start_lines[name] = self.new_start_line
+
+            self.new_start_line = 0
+
+        else:
+            if name in self.sections and self.sections[name] != "":
+                # Only warn on user-specified duplicate section names
+                if name != SECTION_DEFAULT:
+                    self.emit_msg(self.new_start_line,
+                                  f"duplicate section name '{name}'")
+                # Treat as a new paragraph - add a blank line
+                self.sections[name] += '\n' + contents
+            else:
+                self.sections[name] = contents
+                self.section_start_lines[name] = self.new_start_line
+                self.new_start_line = 0
+
+#        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
+
+        if start_new:
+            self.section = SECTION_DEFAULT
+            self._contents = []
+
+python_warning = False
+
+class KernelDoc:
+    """
+    Read a C language source or header FILE and extract embedded
+    documentation comments.
+    """
+
+    # Section names
+
+    section_context = "Context"
+    section_return = "Return"
+
+    undescribed = "-- undescribed --"
+
+    def __init__(self, config, fname):
+        """Initialize internal variables"""
+
+        self.fname = fname
+        self.config = config
+
+        # Initial state for the state machines
+        self.state = state.NORMAL
+
+        # Store entry currently being processed
+        self.entry = None
+
+        # Place all potential outputs into an array
+        self.entries = []
+
+        #
+        # We need Python 3.7 for its "dicts remember the insertion
+        # order" guarantee
+        #
+        global python_warning
+        if (not python_warning and
+            sys.version_info.major == 3 and sys.version_info.minor < 7):
+
+            self.emit_msg(0,
+                          'Python 3.7 or later is required for correct results')
+            python_warning = True
+
+    def emit_msg(self, ln, msg, *, warning=True):
+        """Emit a message"""
+
+        if self.entry:
+            self.entry.emit_msg(ln, msg, warning=warning)
+            return
+
+        log_msg = f"{self.fname}:{ln} {msg}"
+
+        if warning:
+            self.config.log.warning(log_msg)
+        else:
+            self.config.log.info(log_msg)
+
+    def dump_section(self, start_new=True):
+        """
+        Dumps section contents to arrays/hashes intended for that purpose.
+        """
+
+        if self.entry:
+            self.entry.dump_section(start_new)
+
+    # TODO: rename it to store_declaration after removal of kernel-doc.pl
+    def output_declaration(self, dtype, name, **args):
+        """
+        Stores the entry into an entry array.
+
+        The actual output and output filters will be handled elsewhere
+        """
+
+        item = KdocItem(name, self.fname, dtype,
+                        self.entry.declaration_start_line, **args)
+        item.warnings = self.entry.warnings
+
+        # Drop empty sections
+        # TODO: improve empty sections logic to emit warnings
+        sections = self.entry.sections
+        for section in ["Description", "Return"]:
+            if section in sections and not sections[section].rstrip():
+                del sections[section]
+        item.set_sections(sections, self.entry.section_start_lines)
+        item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
+                        self.entry.parametertypes,
+                        self.entry.parameterdesc_start_lines)
+        self.entries.append(item)
+
+        self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
+
+    def reset_state(self, ln):
+        """
+        Ancillary routine to create a new entry. It initializes all
+        variables used by the state machine.
+        """
+
+        #
+        # Flush the warnings out before we proceed further
+        #
+        if self.entry and self.entry not in self.entries:
+            for log_msg in self.entry.warnings:
+                self.config.log.warning(log_msg)
+
+        self.entry = KernelEntry(self.config, self.fname, ln)
+
+        # State flags
+        self.state = state.NORMAL
+
+    def push_parameter(self, ln, decl_type, param, dtype,
+                       org_arg, declaration_name):
+        """
+        Store parameters and their descriptions at self.entry.
+        """
+
+        if self.entry.anon_struct_union and dtype == "" and param == "}":
+            return  # Ignore the ending }; from anonymous struct/union
+
+        self.entry.anon_struct_union = False
+
+        param = KernRe(r'[\[\)].*').sub('', param, count=1)
+
+        #
+        # Look at various "anonymous type" cases.
+        #
+        if dtype == '':
+            if param.endswith("..."):
+                if len(param) > 3: # there is a name provided, use that
+                    param = param[:-3]
+                if not self.entry.parameterdescs.get(param):
+                    self.entry.parameterdescs[param] = "variable arguments"
+
+            elif (not param) or param == "void":
+                param = "void"
+                self.entry.parameterdescs[param] = "no arguments"
+
+            elif param in ["struct", "union"]:
+                # Handle unnamed (anonymous) union or struct
+                dtype = param
+                param = "{unnamed_" + param + "}"
+                self.entry.parameterdescs[param] = "anonymous\n"
+                self.entry.anon_struct_union = True
+
+        # Warn if parameter has no description
+        # (but ignore ones starting with # as these are not parameters
+        # but inline preprocessor statements)
+        if param not in self.entry.parameterdescs and not param.startswith("#"):
+            self.entry.parameterdescs[param] = self.undescribed
+
+            if "." not in param:
+                if decl_type == 'function':
+                    dname = f"{decl_type} parameter"
+                else:
+                    dname = f"{decl_type} member"
+
+                self.emit_msg(ln,
+                              f"{dname} '{param}' not described in '{declaration_name}'")
+
+        # Strip spaces from param so that it is one continuous string on
+        # parameterlist. This fixes a problem where check_sections()
+        # cannot find a parameter like "addr[6 + 2]" because it actually
+        # appears as "addr[6", "+", "2]" on the parameter list.
+        # However, it's better to maintain the param string unchanged for
+        # output, so just weaken the string compare in check_sections()
+        # to ignore "[blah" in a parameter string.
+
+        self.entry.parameterlist.append(param)
+        org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
+        self.entry.parametertypes[param] = org_arg
+
+
+    def create_parameter_list(self, ln, decl_type, args,
+                              splitter, declaration_name):
+        """
+        Creates a list of parameters, storing them at self.entry.
+        """
+
+        # temporarily replace all commas inside function pointer definition
+        arg_expr = KernRe(r'(\([^\),]+),')
+        while arg_expr.search(args):
+            args = arg_expr.sub(r"\1#", args)
+
+        for arg in args.split(splitter):
+            # Ignore argument attributes
+            arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
+
+            # Strip leading/trailing spaces
+            arg = arg.strip()
+            arg = KernRe(r'\s+').sub(' ', arg, count=1)
+
+            if arg.startswith('#'):
+                # Treat preprocessor directive as a typeless variable just to fill
+                # corresponding data structures "correctly". Catch it later in
+                # output_* subs.
+
+                # Treat preprocessor directive as a typeless variable
+                self.push_parameter(ln, decl_type, arg, "",
+                                    "", declaration_name)
+            #
+            # The pointer-to-function case.
+            #
+            elif KernRe(r'\(.+\)\s*\(').search(arg):
+                arg = arg.replace('#', ',')
+                r = KernRe(r'[^\(]+\(\*?\s*'  # Everything up to "(*"
+                           r'([\w\[\].]*)'    # Capture the name and possible [array]
+                           r'\s*\)')	      # Make sure the trailing ")" is there
+                if r.match(arg):
+                    param = r.group(1)
+                else:
+                    self.emit_msg(ln, f"Invalid param: {arg}")
+                    param = arg
+                dtype = arg.replace(param, '')
+                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+            #
+            # The array-of-pointers case.  Dig the parameter name out from the middle
+            # of the declaration.
+            #
+            elif KernRe(r'\(.+\)\s*\[').search(arg):
+                r = KernRe(r'[^\(]+\(\s*\*\s*'		# Up to "(" and maybe "*"
+                           r'([\w.]*?)'			# The actual pointer name
+                           r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
+                if r.match(arg):
+                    param = r.group(1)
+                else:
+                    self.emit_msg(ln, f"Invalid param: {arg}")
+                    param = arg
+                dtype = arg.replace(param, '')
+                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+            elif arg:
+                #
+                # Clean up extraneous spaces and split the string at commas; the first
+                # element of the resulting list will also include the type information.
+                #
+                arg = KernRe(r'\s*:\s*').sub(":", arg)
+                arg = KernRe(r'\s*\[').sub('[', arg)
+                args = KernRe(r'\s*,\s*').split(arg)
+                args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
+                #
+                # args[0] has a string of "type a".  If "a" includes an [array]
+                # declaration, we want to not be fooled by any white space inside
+                # the brackets, so detect and handle that case specially.
+                #
+                r = KernRe(r'^([^[\]]*\s+)(.*)$')
+                if r.match(args[0]):
+                    args[0] = r.group(2)
+                    dtype = r.group(1)
+                else:
+                    # No space in args[0]; this seems wrong but preserves previous behavior
+                    dtype = ''
+
+                bitfield_re = KernRe(r'(.*?):(\w+)')
+                for param in args:
+                    #
+                    # For pointers, shift the star(s) from the variable name to the
+                    # type declaration.
+                    #
+                    r = KernRe(r'^(\*+)\s*(.*)')
+                    if r.match(param):
+                        self.push_parameter(ln, decl_type, r.group(2),
+                                            f"{dtype} {r.group(1)}",
+                                            arg, declaration_name)
+                    #
+                    # Perform a similar shift for bitfields.
+                    #
+                    elif bitfield_re.search(param):
+                        if dtype != "":  # Skip unnamed bit-fields
+                            self.push_parameter(ln, decl_type, bitfield_re.group(1),
+                                                f"{dtype}:{bitfield_re.group(2)}",
+                                                arg, declaration_name)
+                    else:
+                        self.push_parameter(ln, decl_type, param, dtype,
+                                            arg, declaration_name)
+
+    def check_sections(self, ln, decl_name, decl_type):
+        """
+        Check for errors inside sections, emitting warnings if not found
+        parameters are described.
+        """
+        for section in self.entry.sections:
+            if section not in self.entry.parameterlist and \
+               not known_sections.search(section):
+                if decl_type == 'function':
+                    dname = f"{decl_type} parameter"
+                else:
+                    dname = f"{decl_type} member"
+                self.emit_msg(ln,
+                              f"Excess {dname} '{section}' description in '{decl_name}'")
+
+    def check_return_section(self, ln, declaration_name, return_type):
+        """
+        If the function doesn't return void, warns about the lack of a
+        return description.
+        """
+
+        if not self.config.wreturn:
+            return
+
+        # Ignore an empty return type (It's a macro)
+        # Ignore functions with a "void" return type (but not "void *")
+        if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
+            return
+
+        if not self.entry.sections.get("Return", None):
+            self.emit_msg(ln,
+                          f"No description found for return value of '{declaration_name}'")
+
+    #
+    # Split apart a structure prototype; returns (struct|union, name, members) or None
+    #
+    def split_struct_proto(self, proto):
+        type_pattern = r'(struct|union)'
+        qualifiers = [
+            "__attribute__",
+            "__packed",
+            "__aligned",
+            "____cacheline_aligned_in_smp",
+            "____cacheline_aligned",
+        ]
+        definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
+
+        r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
+        if r.search(proto):
+            return (r.group(1), r.group(2), r.group(3))
+        else:
+            r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
+            if r.search(proto):
+                return (r.group(1), r.group(3), r.group(2))
+        return None
+    #
+    # Rewrite the members of a structure or union for easier formatting later on.
+    # Among other things, this function will turn a member like:
+    #
+    #  struct { inner_members; } foo;
+    #
+    # into:
+    #
+    #  struct foo; inner_members;
+    #
+    def rewrite_struct_members(self, members):
+        #
+        # Process struct/union members from the most deeply nested outward.  The
+        # trick is in the ^{ below - it prevents a match of an outer struct/union
+        # until the inner one has been munged (removing the "{" in the process).
+        #
+        struct_members = KernRe(r'(struct|union)'   # 0: declaration type
+                                r'([^\{\};]+)' 	    # 1: possible name
+                                r'(\{)'
+                                r'([^\{\}]*)'       # 3: Contents of declaration
+                                r'(\})'
+                                r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration
+        tuples = struct_members.findall(members)
+        while tuples:
+            for t in tuples:
+                newmember = ""
+                oldmember = "".join(t) # Reconstruct the original formatting
+                dtype, name, lbr, content, rbr, rest, semi = t
+                #
+                # Pass through each field name, normalizing the form and formatting.
+                #
+                for s_id in rest.split(','):
+                    s_id = s_id.strip()
+                    newmember += f"{dtype} {s_id}; "
+                    #
+                    # Remove bitfield/array/pointer info, getting the bare name.
+                    #
+                    s_id = KernRe(r'[:\[].*').sub('', s_id)
+                    s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
+                    #
+                    # Pass through the members of this inner structure/union.
+                    #
+                    for arg in content.split(';'):
+                        arg = arg.strip()
+                        #
+                        # Look for (type)(*name)(args) - pointer to function
+                        #
+                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
+                        if r.match(arg):
+                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
+                            # Pointer-to-function
+                            if not s_id:
+                                # Anonymous struct/union
+                                newmember += f"{dtype}{name}{extra}; "
+                            else:
+                                newmember += f"{dtype}{s_id}.{name}{extra}; "
+                        #
+                        # Otherwise a non-function member.
+                        #
+                        else:
+                            #
+                            # Remove bitmap and array portions and spaces around commas
+                            #
+                            arg = KernRe(r':\s*\d+\s*').sub('', arg)
+                            arg = KernRe(r'\[.*\]').sub('', arg)
+                            arg = KernRe(r'\s*,\s*').sub(',', arg)
+                            #
+                            # Look for a normal decl - "type name[,name...]"
+                            #
+                            r = KernRe(r'(.*)\s+([\S+,]+)')
+                            if r.search(arg):
+                                for name in r.group(2).split(','):
+                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
+                                    if not s_id:
+                                        # Anonymous struct/union
+                                        newmember += f"{r.group(1)} {name}; "
+                                    else:
+                                        newmember += f"{r.group(1)} {s_id}.{name}; "
+                            else:
+                                newmember += f"{arg}; "
+                #
+                # At the end of the s_id loop, replace the original declaration with
+                # the munged version.
+                #
+                members = members.replace(oldmember, newmember)
+            #
+            # End of the tuple loop - search again and see if there are outer members
+            # that now turn up.
+            #
+            tuples = struct_members.findall(members)
+        return members
+
+    #
+    # Format the struct declaration into a standard form for inclusion in the
+    # resulting docs.
+    #
+    def format_struct_decl(self, declaration):
+        #
+        # Insert newlines, get rid of extra spaces.
+        #
+        declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
+        declaration = KernRe(r'\}\s+;').sub('};', declaration)
+        #
+        # Format inline enums with each member on its own line.
+        #
+        r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
+        while r.search(declaration):
+            declaration = r.sub(r'\1,\n\2', declaration)
+        #
+        # Now go through and supply the right number of tabs
+        # for each line.
+        #
+        def_args = declaration.split('\n')
+        level = 1
+        declaration = ""
+        for clause in def_args:
+            clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
+            if clause:
+                if '}' in clause and level > 1:
+                    level -= 1
+                if not clause.startswith('#'):
+                    declaration += "\t" * level
+                declaration += "\t" + clause + "\n"
+                if "{" in clause and "}" not in clause:
+                    level += 1
+        return declaration
+
+
+    def dump_struct(self, ln, proto):
+        """
+        Store an entry for a struct or union
+        """
+        #
+        # Do the basic parse to get the pieces of the declaration.
+        #
+        struct_parts = self.split_struct_proto(proto)
+        if not struct_parts:
+            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
+            return
+        decl_type, declaration_name, members = struct_parts
+
+        if self.entry.identifier != declaration_name:
+            self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
+                          f"Prototype was for {decl_type} {declaration_name} instead\n")
+            return
+        #
+        # Go through the list of members applying all of our transformations.
+        #
+        members = trim_private_members(members)
+        members = apply_transforms(struct_xforms, members)
+
+        nested = NestedMatch()
+        for search, sub in struct_nested_prefixes:
+            members = nested.sub(search, sub, members)
+        #
+        # Deal with embedded struct and union members, and drop enums entirely.
+        #
+        declaration = members
+        members = self.rewrite_struct_members(members)
+        members = re.sub(r'(\{[^\{\}]*\})', '', members)
+        #
+        # Output the result and we are done.
+        #
+        self.create_parameter_list(ln, decl_type, members, ';',
+                                   declaration_name)
+        self.check_sections(ln, declaration_name, decl_type)
+        self.output_declaration(decl_type, declaration_name,
+                                definition=self.format_struct_decl(declaration),
+                                purpose=self.entry.declaration_purpose)
+
+    def dump_enum(self, ln, proto):
+        """
+        Stores an enum inside self.entries array.
+        """
+        #
+        # Strip preprocessor directives.  Note that this depends on the
+        # trailing semicolon we added in process_proto_type().
+        #
+        proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
+        #
+        # Parse out the name and members of the enum.  Typedef form first.
+        #
+        r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
+        if r.search(proto):
+            declaration_name = r.group(2)
+            members = trim_private_members(r.group(1))
+        #
+        # Failing that, look for a straight enum
+        #
+        else:
+            r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
+            if r.match(proto):
+                declaration_name = r.group(1)
+                members = trim_private_members(r.group(2))
+        #
+        # OK, this isn't going to work.
+        #
+            else:
+                self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
+                return
+        #
+        # Make sure we found what we were expecting.
+        #
+        if self.entry.identifier != declaration_name:
+            if self.entry.identifier == "":
+                self.emit_msg(ln,
+                              f"{proto}: wrong kernel-doc identifier on prototype")
+            else:
+                self.emit_msg(ln,
+                              f"expecting prototype for enum {self.entry.identifier}. "
+                              f"Prototype was for enum {declaration_name} instead")
+            return
+
+        if not declaration_name:
+            declaration_name = "(anonymous)"
+        #
+        # Parse out the name of each enum member, and verify that we
+        # have a description for it.
+        #
+        member_set = set()
+        members = KernRe(r'\([^;)]*\)').sub('', members)
+        for arg in members.split(','):
+            if not arg:
+                continue
+            arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+            self.entry.parameterlist.append(arg)
+            if arg not in self.entry.parameterdescs:
+                self.entry.parameterdescs[arg] = self.undescribed
+                self.emit_msg(ln,
+                              f"Enum value '{arg}' not described in enum '{declaration_name}'")
+            member_set.add(arg)
+        #
+        # Ensure that every described member actually exists in the enum.
+        #
+        for k in self.entry.parameterdescs:
+            if k not in member_set:
+                self.emit_msg(ln,
+                              f"Excess enum value '@{k}' description in '{declaration_name}'")
+
+        self.output_declaration('enum', declaration_name,
+                                purpose=self.entry.declaration_purpose)
+
+    def dump_declaration(self, ln, prototype):
+        """
+        Stores a data declaration inside self.entries array.
+        """
+
+        if self.entry.decl_type == "enum":
+            self.dump_enum(ln, prototype)
+        elif self.entry.decl_type == "typedef":
+            self.dump_typedef(ln, prototype)
+        elif self.entry.decl_type in ["union", "struct"]:
+            self.dump_struct(ln, prototype)
+        else:
+            # This would be a bug
+            self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
+
+    def dump_function(self, ln, prototype):
+        """
+        Stores a function or function macro inside self.entries array.
+        """
+
+        found = func_macro = False
+        return_type = ''
+        decl_type = 'function'
+        #
+        # Apply the initial transformations.
+        #
+        prototype = apply_transforms(function_xforms, prototype)
+        #
+        # If we have a macro, remove the "#define" at the front.
+        #
+        new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
+        if new_proto != prototype:
+            prototype = new_proto
+            #
+            # Dispense with the simple "#define A B" case here; the key
+            # is the space after the name of the symbol being defined.
+            # NOTE that the seemingly misnamed "func_macro" indicates a
+            # macro *without* arguments.
+            #
+            r = KernRe(r'^(\w+)\s+')
+            if r.search(prototype):
+                return_type = ''
+                declaration_name = r.group(1)
+                func_macro = True
+                found = True
+
+        # Yes, this truly is vile.  We are looking for:
+        # 1. Return type (may be nothing if we're looking at a macro)
+        # 2. Function name
+        # 3. Function parameters.
+        #
+        # All the while we have to watch out for function pointer parameters
+        # (which IIRC is what the two sections are for), C types (these
+        # regexps don't even start to express all the possibilities), and
+        # so on.
+        #
+        # If you mess with these regexps, it's a good idea to check that
+        # the following functions' documentation still comes out right:
+        # - parport_register_device (function pointer parameters)
+        # - atomic_set (macro)
+        # - pci_match_device, __copy_to_user (long return type)
+
+        name = r'\w+'
+        type1 = r'(?:[\w\s]+)?'
+        type2 = r'(?:[\w\s]+\*+)+'
+        #
+        # Attempt to match first on (args) with no internal parentheses; this
+        # lets us easily filter out __acquires() and other post-args stuff.  If
+        # that fails, just grab the rest of the line to the last closing
+        # parenthesis.
+        #
+        proto_args = r'\(([^\(]*|.*)\)'
+        #
+        # (Except for the simple macro case) attempt to split up the prototype
+        # in the various ways we understand.
+        #
+        if not found:
+            patterns = [
+                rf'^()({name})\s*{proto_args}',
+                rf'^({type1})\s+({name})\s*{proto_args}',
+                rf'^({type2})\s*({name})\s*{proto_args}',
+            ]
+
+            for p in patterns:
+                r = KernRe(p)
+                if r.match(prototype):
+                    return_type = r.group(1)
+                    declaration_name = r.group(2)
+                    args = r.group(3)
+                    self.create_parameter_list(ln, decl_type, args, ',',
+                                               declaration_name)
+                    found = True
+                    break
+        #
+        # Parsing done; make sure that things are as we expect.
+        #
+        if not found:
+            self.emit_msg(ln,
+                          f"cannot understand function prototype: '{prototype}'")
+            return
+        if self.entry.identifier != declaration_name:
+            self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
+                          f"Prototype was for {declaration_name}() instead")
+            return
+        self.check_sections(ln, declaration_name, "function")
+        self.check_return_section(ln, declaration_name, return_type)
+        #
+        # Store the result.
+        #
+        self.output_declaration(decl_type, declaration_name,
+                                typedef=('typedef' in return_type),
+                                functiontype=return_type,
+                                purpose=self.entry.declaration_purpose,
+                                func_macro=func_macro)
+
+
+    def dump_typedef(self, ln, proto):
+        """
+        Stores a typedef inside self.entries array.
+        """
+        #
+        # We start by looking for function typedefs.
+        #
+        typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
+        typedef_ident = r'\*?\s*(\w\S+)\s*'
+        typedef_args = r'\s*\((.*)\);'
+
+        typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
+        typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
+
+        # Parse function typedef prototypes
+        for r in [typedef1, typedef2]:
+            if not r.match(proto):
+                continue
+
+            return_type = r.group(1).strip()
+            declaration_name = r.group(2)
+            args = r.group(3)
+
+            if self.entry.identifier != declaration_name:
+                self.emit_msg(ln,
+                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+                return
+
+            self.create_parameter_list(ln, 'function', args, ',', declaration_name)
+
+            self.output_declaration('function', declaration_name,
+                                    typedef=True,
+                                    functiontype=return_type,
+                                    purpose=self.entry.declaration_purpose)
+            return
+        #
+        # Not a function, try to parse a simple typedef.
+        #
+        r = KernRe(r'typedef.*\s+(\w+)\s*;')
+        if r.match(proto):
+            declaration_name = r.group(1)
+
+            if self.entry.identifier != declaration_name:
+                self.emit_msg(ln,
+                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+                return
+
+            self.output_declaration('typedef', declaration_name,
+                                    purpose=self.entry.declaration_purpose)
+            return
+
+        self.emit_msg(ln, "error: Cannot parse typedef!")
+
+    @staticmethod
+    def process_export(function_set, line):
+        """
+        process EXPORT_SYMBOL* tags
+
+        This method doesn't use any variable from the class, so declare it
+        with a staticmethod decorator.
+        """
+
+        # We support documenting some exported symbols with different
+        # names.  A horrible hack.
+        suffixes = [ '_noprof' ]
+
+        # Note: it accepts only one EXPORT_SYMBOL* per line, as having
+        # multiple export lines would violate Kernel coding style.
+
+        if export_symbol.search(line):
+            symbol = export_symbol.group(2)
+        elif export_symbol_ns.search(line):
+            symbol = export_symbol_ns.group(2)
+        else:
+            return False
+        #
+        # Found an export, trim out any special suffixes
+        #
+        for suffix in suffixes:
+            # Be backward compatible with Python < 3.9
+            if symbol.endswith(suffix):
+                symbol = symbol[:-len(suffix)]
+        function_set.add(symbol)
+        return True
+
+    def process_normal(self, ln, line):
+        """
+        STATE_NORMAL: looking for the /** to begin everything.
+        """
+
+        if not doc_start.match(line):
+            return
+
+        # start a new entry
+        self.reset_state(ln)
+
+        # next line is always the function name
+        self.state = state.NAME
+
+    def process_name(self, ln, line):
+        """
+        STATE_NAME: Looking for the "name - description" line
+        """
+        #
+        # Check for a DOC: block and handle them specially.
+        #
+        if doc_block.search(line):
+
+            if not doc_block.group(1):
+                self.entry.begin_section(ln, "Introduction")
+            else:
+                self.entry.begin_section(ln, doc_block.group(1))
+
+            self.entry.identifier = self.entry.section
+            self.state = state.DOCBLOCK
+        #
+        # Otherwise we're looking for a normal kerneldoc declaration line.
+        #
+        elif doc_decl.search(line):
+            self.entry.identifier = doc_decl.group(1)
+
+            # Test for data declaration
+            if doc_begin_data.search(line):
+                self.entry.decl_type = doc_begin_data.group(1)
+                self.entry.identifier = doc_begin_data.group(2)
+            #
+            # Look for a function description
+            #
+            elif doc_begin_func.search(line):
+                self.entry.identifier = doc_begin_func.group(1)
+                self.entry.decl_type = "function"
+            #
+            # We struck out.
+            #
+            else:
+                self.emit_msg(ln,
+                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
+                self.state = state.NORMAL
+                return
+            #
+            # OK, set up for a new kerneldoc entry.
+            #
+            self.state = state.BODY
+            self.entry.identifier = self.entry.identifier.strip(" ")
+            # if there's no @param blocks need to set up default section here
+            self.entry.begin_section(ln + 1)
+            #
+            # Find the description portion, which *should* be there but
+            # isn't always.
+            # (We should be able to capture this from the previous parsing - someday)
+            #
+            r = KernRe("[-:](.*)")
+            if r.search(line):
+                self.entry.declaration_purpose = trim_whitespace(r.group(1))
+                self.state = state.DECLARATION
+            else:
+                self.entry.declaration_purpose = ""
+
+            if not self.entry.declaration_purpose and self.config.wshort_desc:
+                self.emit_msg(ln,
+                              f"missing initial short description on line:\n{line}")
+
+            if not self.entry.identifier and self.entry.decl_type != "enum":
+                self.emit_msg(ln,
+                              f"wrong kernel-doc identifier on line:\n{line}")
+                self.state = state.NORMAL
+
+            if self.config.verbose:
+                self.emit_msg(ln,
+                              f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
+                                  warning=False)
+        #
+        # Failed to find an identifier. Emit a warning
+        #
+        else:
+            self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
+
+    #
+    # Helper function to determine if a new section is being started.
+    #
+    def is_new_section(self, ln, line):
+        if doc_sect.search(line):
+            self.state = state.BODY
+            #
+            # Pick out the name of our new section, tweaking it if need be.
+            #
+            newsection = doc_sect.group(1)
+            if newsection.lower() == 'description':
+                newsection = 'Description'
+            elif newsection.lower() == 'context':
+                newsection = 'Context'
+                self.state = state.SPECIAL_SECTION
+            elif newsection.lower() in ["@return", "@returns",
+                                        "return", "returns"]:
+                newsection = "Return"
+                self.state = state.SPECIAL_SECTION
+            elif newsection[0] == '@':
+                self.state = state.SPECIAL_SECTION
+            #
+            # Initialize the contents, and get the new section going.
+            #
+            newcontents = doc_sect.group(2)
+            if not newcontents:
+                newcontents = ""
+            self.dump_section()
+            self.entry.begin_section(ln, newsection)
+            self.entry.leading_space = None
+
+            self.entry.add_text(newcontents.lstrip())
+            return True
+        return False
+
+    #
+    # Helper function to detect (and effect) the end of a kerneldoc comment.
+    #
+    def is_comment_end(self, ln, line):
+        if doc_end.search(line):
+            self.dump_section()
+
+            # Look for doc_com + <text> + doc_end:
+            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
+            if r.match(line):
+                self.emit_msg(ln, f"suspicious ending line: {line}")
+
+            self.entry.prototype = ""
+            self.entry.new_start_line = ln + 1
+
+            self.state = state.PROTO
+            return True
+        return False
+
+
+    def process_decl(self, ln, line):
+        """
+        STATE_DECLARATION: We've seen the beginning of a declaration
+        """
+        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+            return
+        #
+        # Look for anything with the " * " line beginning.
+        #
+        if doc_content.search(line):
+            cont = doc_content.group(1)
+            #
+            # A blank line means that we have moved out of the declaration
+            # part of the comment (without any "special section" parameter
+            # descriptions).
+            #
+            if cont == "":
+                self.state = state.BODY
+            #
+            # Otherwise we have more of the declaration section to soak up.
+            #
+            else:
+                self.entry.declaration_purpose = \
+                    trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
+        else:
+            # Unknown line, ignore
+            self.emit_msg(ln, f"bad line: {line}")
+
+
+    def process_special(self, ln, line):
+        """
+        STATE_SPECIAL_SECTION: a section ending with a blank line
+        """
+        #
+        # If we have hit a blank line (only the " * " marker), then this
+        # section is done.
+        #
+        if KernRe(r"\s*\*\s*$").match(line):
+            self.entry.begin_section(ln, dump = True)
+            self.state = state.BODY
+            return
+        #
+        # Not a blank line, look for the other ways to end the section.
+        #
+        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+            return
+        #
+        # OK, we should have a continuation of the text for this section.
+        #
+        if doc_content.search(line):
+            cont = doc_content.group(1)
+            #
+            # If the lines of text after the first in a special section have
+            # leading white space, we need to trim it out or Sphinx will get
+            # confused.  For the second line (the None case), see what we
+            # find there and remember it.
+            #
+            if self.entry.leading_space is None:
+                r = KernRe(r'^(\s+)')
+                if r.match(cont):
+                    self.entry.leading_space = len(r.group(1))
+                else:
+                    self.entry.leading_space = 0
+            #
+            # Otherwise, before trimming any leading chars, be *sure*
+            # that they are white space.  We should maybe warn if this
+            # isn't the case.
+            #
+            for i in range(0, self.entry.leading_space):
+                if cont[i] != " ":
+                    self.entry.leading_space = i
+                    break
+            #
+            # Add the trimmed result to the section and we're done.
+            #
+            self.entry.add_text(cont[self.entry.leading_space:])
+        else:
+            # Unknown line, ignore
+            self.emit_msg(ln, f"bad line: {line}")
+
+    def process_body(self, ln, line):
+        """
+        STATE_BODY: the bulk of a kerneldoc comment.
+        """
+        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+            return
+
+        if doc_content.search(line):
+            cont = doc_content.group(1)
+            self.entry.add_text(cont)
+        else:
+            # Unknown line, ignore
+            self.emit_msg(ln, f"bad line: {line}")
+
+    def process_inline_name(self, ln, line):
+        """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
+
+        if doc_inline_sect.search(line):
+            self.entry.begin_section(ln, doc_inline_sect.group(1))
+            self.entry.add_text(doc_inline_sect.group(2).lstrip())
+            self.state = state.INLINE_TEXT
+        elif doc_inline_end.search(line):
+            self.dump_section()
+            self.state = state.PROTO
+        elif doc_content.search(line):
+            self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
+            self.state = state.PROTO
+        # else ... ??
+
+    def process_inline_text(self, ln, line):
+        """STATE_INLINE_TEXT: docbook comments within a prototype."""
+
+        if doc_inline_end.search(line):
+            self.dump_section()
+            self.state = state.PROTO
+        elif doc_content.search(line):
+            self.entry.add_text(doc_content.group(1))
+        # else ... ??
+
+    def syscall_munge(self, ln, proto):         # pylint: disable=W0613
+        """
+        Handle syscall definitions
+        """
+
+        is_void = False
+
+        # Strip newlines/CR's
+        proto = re.sub(r'[\r\n]+', ' ', proto)
+
+        # Check if it's a SYSCALL_DEFINE0
+        if 'SYSCALL_DEFINE0' in proto:
+            is_void = True
+
+        # Replace SYSCALL_DEFINE with correct return type & function name
+        proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
+
+        r = KernRe(r'long\s+(sys_.*?),')
+        if r.search(proto):
+            proto = KernRe(',').sub('(', proto, count=1)
+        elif is_void:
+            proto = KernRe(r'\)').sub('(void)', proto, count=1)
+
+        # Now delete all of the odd-numbered commas in the proto
+        # so that argument types & names don't have a comma between them
+        count = 0
+        length = len(proto)
+
+        if is_void:
+            length = 0  # skip the loop if is_void
+
+        for ix in range(length):
+            if proto[ix] == ',':
+                count += 1
+                if count % 2 == 1:
+                    proto = proto[:ix] + ' ' + proto[ix + 1:]
+
+        return proto
+
+    def tracepoint_munge(self, ln, proto):
+        """
+        Handle tracepoint definitions
+        """
+
+        tracepointname = None
+        tracepointargs = None
+
+        # Match tracepoint name based on different patterns
+        r = KernRe(r'TRACE_EVENT\((.*?),')
+        if r.search(proto):
+            tracepointname = r.group(1)
+
+        r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
+        if r.search(proto):
+            tracepointname = r.group(1)
+
+        r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
+        if r.search(proto):
+            tracepointname = r.group(2)
+
+        if tracepointname:
+            tracepointname = tracepointname.lstrip()
+
+        r = KernRe(r'TP_PROTO\((.*?)\)')
+        if r.search(proto):
+            tracepointargs = r.group(1)
+
+        if not tracepointname or not tracepointargs:
+            self.emit_msg(ln,
+                          f"Unrecognized tracepoint format:\n{proto}\n")
+        else:
+            proto = f"static inline void trace_{tracepointname}({tracepointargs})"
+            self.entry.identifier = f"trace_{self.entry.identifier}"
+
+        return proto
+
+    def process_proto_function(self, ln, line):
+        """Ancillary routine to process a function prototype"""
+
+        # strip C99-style comments to end of line
+        line = KernRe(r"//.*$", re.S).sub('', line)
+        #
+        # Soak up the line's worth of prototype text, stopping at { or ; if present.
+        #
+        if KernRe(r'\s*#\s*define').match(line):
+            self.entry.prototype = line
+        elif not line.startswith('#'):   # skip other preprocessor stuff
+            r = KernRe(r'([^\{]*)')
+            if r.match(line):
+                self.entry.prototype += r.group(1) + " "
+        #
+        # If we now have the whole prototype, clean it up and declare victory.
+        #
+        if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
+            # strip comments and surrounding spaces
+            self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
+            #
+            # Handle self.entry.prototypes for function pointers like:
+            #       int (*pcs_config)(struct foo)
+            # by turning it into
+            #	    int pcs_config(struct foo)
+            #
+            r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
+            self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
+            #
+            # Handle special declaration syntaxes
+            #
+            if 'SYSCALL_DEFINE' in self.entry.prototype:
+                self.entry.prototype = self.syscall_munge(ln,
+                                                          self.entry.prototype)
+            else:
+                r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
+                if r.search(self.entry.prototype):
+                    self.entry.prototype = self.tracepoint_munge(ln,
+                                                                 self.entry.prototype)
+            #
+            # ... and we're done
+            #
+            self.dump_function(ln, self.entry.prototype)
+            self.reset_state(ln)
+
+    def process_proto_type(self, ln, line):
+        """Ancillary routine to process a type"""
+
+        # Strip C99-style comments and surrounding whitespace
+        line = KernRe(r"//.*$", re.S).sub('', line).strip()
+        if not line:
+            return # nothing to see here
+
+        # To distinguish preprocessor directive from regular declaration later.
+        if line.startswith('#'):
+            line += ";"
+        #
+        # Split the declaration on any of { } or ;, and accumulate pieces
+        # until we hit a semicolon while not inside {brackets}
+        #
+        r = KernRe(r'(.*?)([{};])')
+        for chunk in r.split(line):
+            if chunk:  # Ignore empty matches
+                self.entry.prototype += chunk
+                #
+                # This cries out for a match statement ... someday after we can
+                # drop Python 3.9 ...
+                #
+                if chunk == '{':
+                    self.entry.brcount += 1
+                elif chunk == '}':
+                    self.entry.brcount -= 1
+                elif chunk == ';' and self.entry.brcount <= 0:
+                    self.dump_declaration(ln, self.entry.prototype)
+                    self.reset_state(ln)
+                    return
+        #
+        # We hit the end of the line while still in the declaration; put
+        # in a space to represent the newline.
+        #
+        self.entry.prototype += ' '
+
+    def process_proto(self, ln, line):
+        """STATE_PROTO: reading a function/whatever prototype."""
+
+        if doc_inline_oneline.search(line):
+            self.entry.begin_section(ln, doc_inline_oneline.group(1))
+            self.entry.add_text(doc_inline_oneline.group(2))
+            self.dump_section()
+
+        elif doc_inline_start.search(line):
+            self.state = state.INLINE_NAME
+
+        elif self.entry.decl_type == 'function':
+            self.process_proto_function(ln, line)
+
+        else:
+            self.process_proto_type(ln, line)
+
+    def process_docblock(self, ln, line):
+        """STATE_DOCBLOCK: within a DOC: block."""
+
+        if doc_end.search(line):
+            self.dump_section()
+            self.output_declaration("doc", self.entry.identifier)
+            self.reset_state(ln)
+
+        elif doc_content.search(line):
+            self.entry.add_text(doc_content.group(1))
+
+    def parse_export(self):
+        """
+        Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+        """
+
+        export_table = set()
+
+        try:
+            with open(self.fname, "r", encoding="utf8",
+                      errors="backslashreplace") as fp:
+
+                for line in fp:
+                    self.process_export(export_table, line)
+
+        except IOError:
+            return None
+
+        return export_table
+
+    #
+    # The state/action table telling us which function to invoke in
+    # each state.
+    #
+    state_actions = {
+        state.NORMAL:			process_normal,
+        state.NAME:			process_name,
+        state.BODY:			process_body,
+        state.DECLARATION:		process_decl,
+        state.SPECIAL_SECTION:		process_special,
+        state.INLINE_NAME:		process_inline_name,
+        state.INLINE_TEXT:		process_inline_text,
+        state.PROTO:			process_proto,
+        state.DOCBLOCK:			process_docblock,
+        }
+
+    def parse_kdoc(self):
+        """
+        Open and process each line of a C source file.
+        The parsing is controlled via a state machine, and the line is passed
+        to a different process function depending on the state. The process
+        function may update the state as needed.
+
+        Besides parsing kernel-doc tags, it also parses export symbols.
+        """
+
+        prev = ""
+        prev_ln = None
+        export_table = set()
+
+        try:
+            with open(self.fname, "r", encoding="utf8",
+                      errors="backslashreplace") as fp:
+                for ln, line in enumerate(fp):
+
+                    line = line.expandtabs().strip("\n")
+
+                    # Group continuation lines on prototypes
+                    if self.state == state.PROTO:
+                        if line.endswith("\\"):
+                            prev += line.rstrip("\\")
+                            if not prev_ln:
+                                prev_ln = ln
+                            continue
+
+                        if prev:
+                            ln = prev_ln
+                            line = prev + line
+                            prev = ""
+                            prev_ln = None
+
+                    self.config.log.debug("%d %s: %s",
+                                          ln, state.name[self.state],
+                                          line)
+
+                    # This is an optimization over the original script.
+                    # There, when export_file was used for the same file,
+                    # it was read twice. Here, we use the already-existing
+                    # loop to parse exported symbols as well.
+                    #
+                    if (self.state != state.NORMAL) or \
+                       not self.process_export(export_table, line):
+                        # Hand this line to the appropriate state handler
+                        self.state_actions[self.state](self, ln, line)
+
+        except OSError:
+            self.config.log.error(f"Error: Cannot open file {self.fname}")
+
+        return export_table, self.entries
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
new file mode 100644
index 000000000000..2dfa1bf83d64
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+"""
+
+import re
+
+# Local cache for regular expressions
+re_cache = {}
+
+
+class KernRe:
+    """
+    Helper class to simplify regex declaration and usage.
+
+    It calls re.compile for a given pattern. It also allows adding
+    regular expressions and define sub at class init time.
+
+    Regular expressions can be cached via an argument, helping to speedup
+    searches.
+    """
+
+    def _add_regex(self, string, flags):
+        """
+        Adds a new regex or reuses it from the cache.
+        """
+        self.regex = re_cache.get(string, None)
+        if not self.regex:
+            self.regex = re.compile(string, flags=flags)
+            if self.cache:
+                re_cache[string] = self.regex
+
+    def __init__(self, string, cache=True, flags=0):
+        """
+        Compile a regular expression and initialize internal vars.
+        """
+
+        self.cache = cache
+        self.last_match = None
+
+        self._add_regex(string, flags)
+
+    def __str__(self):
+        """
+        Return the regular expression pattern.
+        """
+        return self.regex.pattern
+
+    def __add__(self, other):
+        """
+        Allows adding two regular expressions into one.
+        """
+
+        return KernRe(str(self) + str(other), cache=self.cache or other.cache,
+                  flags=self.regex.flags | other.regex.flags)
+
+    def match(self, string):
+        """
+        Handles a re.match storing its results
+        """
+
+        self.last_match = self.regex.match(string)
+        return self.last_match
+
+    def search(self, string):
+        """
+        Handles a re.search storing its results
+        """
+
+        self.last_match = self.regex.search(string)
+        return self.last_match
+
+    def findall(self, string):
+        """
+        Alias to re.findall
+        """
+
+        return self.regex.findall(string)
+
+    def split(self, string):
+        """
+        Alias to re.split
+        """
+
+        return self.regex.split(string)
+
+    def sub(self, sub, string, count=0):
+        """
+        Alias to re.sub
+        """
+
+        return self.regex.sub(sub, string, count=count)
+
+    def group(self, num):
+        """
+        Returns the group results of the last match
+        """
+
+        return self.last_match.group(num)
+
+
+class NestedMatch:
+    """
+    Finding nested delimiters is hard with regular expressions. It is
+    even harder on Python with its normal re module, as there are several
+    advanced regular expressions that are missing.
+
+    This is the case of this pattern:
+
+            '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+    which is used to properly match open/close parentheses of the
+    string search STRUCT_GROUP(),
+
+    Add a class that counts pairs of delimiters, using it to match and
+    replace nested expressions.
+
+    The original approach was suggested by:
+        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+
+    Although I re-implemented it to make it more generic and match 3 types
+    of delimiters. The logic checks if delimiters are paired. If not, it
+    will ignore the search string.
+    """
+
+    # TODO: make NestedMatch handle multiple match groups
+    #
+    # Right now, regular expressions to match it are defined only up to
+    #       the start delimiter, e.g.:
+    #
+    #       \bSTRUCT_GROUP\(
+    #
+    # is similar to: STRUCT_GROUP\((.*)\)
+    # except that the content inside the match group is delimiter-aligned.
+    #
+    # The content inside parentheses is converted into a single replace
+    # group (e.g. r`\1').
+    #
+    # It would be nice to change such definition to support multiple
+    # match groups, allowing a regex equivalent to:
+    #
+    #   FOO\((.*), (.*), (.*)\)
+    #
+    # it is probably easier to define it not as a regular expression, but
+    # with some lexical definition like:
+    #
+    #   FOO(arg1, arg2, arg3)
+
+    DELIMITER_PAIRS = {
+        '{': '}',
+        '(': ')',
+        '[': ']',
+    }
+
+    RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
+
+    def _search(self, regex, line):
+        """
+        Finds paired blocks for a regex that ends with a delimiter.
+
+        The suggestion of using finditer to match pairs came from:
+        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+        but I ended using a different implementation to align all three types
+        of delimiters and seek for an initial regular expression.
+
+        The algorithm seeks for open/close paired delimiters and places them
+        into a stack, yielding a start/stop position of each match when the
+        stack is zeroed.
+
+        The algorithm should work fine for properly paired lines, but will
+        silently ignore end delimiters that precede a start delimiter.
+        This should be OK for kernel-doc parser, as unaligned delimiters
+        would cause compilation errors. So, we don't need to raise exceptions
+        to cover such issues.
+        """
+
+        stack = []
+
+        for match_re in regex.finditer(line):
+            start = match_re.start()
+            offset = match_re.end()
+
+            d = line[offset - 1]
+            if d not in self.DELIMITER_PAIRS:
+                continue
+
+            end = self.DELIMITER_PAIRS[d]
+            stack.append(end)
+
+            for match in self.RE_DELIM.finditer(line[offset:]):
+                pos = match.start() + offset
+
+                d = line[pos]
+
+                if d in self.DELIMITER_PAIRS:
+                    end = self.DELIMITER_PAIRS[d]
+
+                    stack.append(end)
+                    continue
+
+                # Does the end delimiter match what is expected?
+                if stack and d == stack[-1]:
+                    stack.pop()
+
+                    if not stack:
+                        yield start, offset, pos + 1
+                        break
+
+    def search(self, regex, line):
+        """
+        This is similar to re.search:
+
+        It matches a regex that it is followed by a delimiter,
+        returning occurrences only if all delimiters are paired.
+        """
+
+        for t in self._search(regex, line):
+
+            yield line[t[0]:t[2]]
+
+    def sub(self, regex, sub, line, count=0):
+        """
+        This is similar to re.sub:
+
+        It matches a regex that it is followed by a delimiter,
+        replacing occurrences only if all delimiters are paired.
+
+        if r'\1' is used, it works just like re: it places there the
+        matched paired data with the delimiter stripped.
+
+        If count is different than zero, it will replace at most count
+        items.
+        """
+        out = ""
+
+        cur_pos = 0
+        n = 0
+
+        for start, end, pos in self._search(regex, line):
+            out += line[cur_pos:start]
+
+            # Value, ignoring start/end delimiters
+            value = line[end:pos - 1]
+
+            # replaces \1 at the sub string, if \1 is used there
+            new_sub = sub
+            new_sub = new_sub.replace(r'\1', value)
+
+            out += new_sub
+
+            # Drop end ';' if any
+            if line[pos] == ';':
+                pos += 1
+
+            cur_pos = pos
+            n += 1
+
+            if count and count >= n:
+                break
+
+        # Append the remaining string
+        l = len(line)
+        out += line[cur_pos:l]
+
+        return out
diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py
new file mode 100755
index 000000000000..29317f8006ea
--- /dev/null
+++ b/tools/lib/python/kdoc/latex_fonts.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) Akira Yokosawa, 2024
+#
+# Ported to Python by (c) Mauro Carvalho Chehab, 2025
+
+"""
+Detect problematic Noto CJK variable fonts.
+
+For "make pdfdocs", reports of build errors of translations.pdf started
+arriving early 2024 [1, 2].  It turned out that Fedora and openSUSE
+tumbleweed have started deploying variable-font [3] format of "Noto CJK"
+fonts [4, 5].  For PDF, a LaTeX package named xeCJK is used for CJK
+(Chinese, Japanese, Korean) pages.  xeCJK requires XeLaTeX/XeTeX, which
+does not (and likely never will) understand variable fonts for historical
+reasons.
+
+The build error happens even when both of variable- and non-variable-format
+fonts are found on the build system.  To make matters worse, Fedora enlists
+variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN,
+-zh_TW, etc.  Hence developers who have interest in CJK pages are more
+likely to encounter the build errors.
+
+This script is invoked from the error path of "make pdfdocs" and emits
+suggestions if variable-font files of "Noto CJK" fonts are in the list of
+fonts accessible from XeTeX.
+
+References:
+[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/
+[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/
+[3]: https://en.wikipedia.org/wiki/Variable_font
+[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts
+[5]: https://build.opensuse.org/request/show/1157217
+
+#===========================================================================
+Workarounds for building translations.pdf
+#===========================================================================
+
+* Denylist "variable font" Noto CJK fonts.
+  - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with
+    tweaks if necessary.  Remove leading "".
+  - Path of fontconfig/fonts.conf can be overridden by setting an env
+    variable FONTS_CONF_DENY_VF.
+
+    * Template:
+-----------------------------------------------------------------
+<?xml version="1.0"?>
+<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
+<fontconfig>
+<!--
+  Ignore variable-font glob (not to break xetex)
+-->
+    <selectfont>
+        <rejectfont>
+            <!--
+                for Fedora
+            -->
+            <glob>/usr/share/fonts/google-noto-*-cjk-vf-fonts</glob>
+            <!--
+                for openSUSE tumbleweed
+            -->
+            <glob>/usr/share/fonts/truetype/Noto*CJK*-VF.otf</glob>
+        </rejectfont>
+    </selectfont>
+</fontconfig>
+-----------------------------------------------------------------
+
+    The denylisting is activated for "make pdfdocs".
+
+* For skipping CJK pages in PDF
+  - Uninstall texlive-xecjk.
+    Denylisting is not needed in this case.
+
+* For printing CJK pages in PDF
+  - Need non-variable "Noto CJK" fonts.
+    * Fedora
+      - google-noto-sans-cjk-fonts
+      - google-noto-serif-cjk-fonts
+    * openSUSE tumbleweed
+      - Non-variable "Noto CJK" fonts are not available as distro packages
+        as of April, 2024.  Fetch a set of font files from upstream Noto
+        CJK Font released at:
+          https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc
+        and at:
+          https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc
+        , then uncompress and deploy them.
+      - Remember to update fontconfig cache by running fc-cache.
+
+!!! Caution !!!
+    Uninstalling "variable font" packages can be dangerous.
+    They might be depended upon by other packages important for your work.
+    Denylisting should be less invasive, as it is effective only while
+    XeLaTeX runs in "make pdfdocs".
+"""
+
+import os
+import re
+import subprocess
+import textwrap
+import sys
+
+class LatexFontChecker:
+    """
+    Detect problems with CJK variable fonts that affect PDF builds for
+    translations.
+    """
+
+    def __init__(self, deny_vf=None):
+        if not deny_vf:
+            deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf")
+
+        self.environ = os.environ.copy()
+        self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf)
+
+        self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK")
+
+    def description(self):
+        return __doc__
+
+    def get_noto_cjk_vf_fonts(self):
+        """Get Noto CJK fonts"""
+
+        cjk_fonts = set()
+        cmd = ["fc-list", ":", "file", "family", "variable"]
+        try:
+            result = subprocess.run(cmd,stdout=subprocess.PIPE,
+                                    stderr=subprocess.PIPE,
+                                    universal_newlines=True,
+                                    env=self.environ,
+                                    check=True)
+
+        except subprocess.CalledProcessError as exc:
+            sys.exit(f"Error running fc-list: {repr(exc)}")
+
+        for line in result.stdout.splitlines():
+            if 'variable=True' not in line:
+                continue
+
+            match = self.re_cjk.search(line)
+            if match:
+                cjk_fonts.add(match.group(1))
+
+        return sorted(cjk_fonts)
+
+    def check(self):
+        """Check for problems with CJK fonts"""
+
+        fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), "    ")
+        if not fonts:
+            return None
+
+        rel_file = os.path.relpath(__file__, os.getcwd())
+
+        msg = "=" * 77 + "\n"
+        msg += 'XeTeX is confused by "variable font" files listed below:\n'
+        msg += fonts + "\n"
+        msg += textwrap.dedent(f"""
+                For CJK pages in PDF, they need to be hidden from XeTeX by denylisting.
+                Or, CJK pages can be skipped by uninstalling texlive-xecjk.
+
+                For more info on denylisting, other options, and variable font, run:
+
+                    tools/docs/check-variable-fonts.py -h
+            """)
+        msg += "=" * 77
+
+        return msg
diff --git a/tools/docs/lib/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py
index a5aa2e182052..25361996cd20 100755
--- a/tools/docs/lib/parse_data_structs.py
+++ b/tools/lib/python/kdoc/parse_data_structs.py
@@ -53,11 +53,19 @@ class ParseDataStructs:
 
         replace <type> <old_symbol> <new_reference>
 
-    Replaces how old_symbol with a new reference. The new_reference can be:
+       Replaces how old_symbol with a new reference. The new_reference can be:
+
         - A simple symbol name;
         - A full Sphinx reference.
 
-    On both cases, <type> can be:
+    3. Namespace rules
+
+        namespace <namespace>
+
+       Sets C namespace to be used during cross-reference generation. Can
+       be overridden by replace rules.
+
+    On ignore and replace rules, <type> can be:
         - ioctl: for defines that end with _IO*, e.g. ioctl definitions
         - define: for other defines
         - symbol: for symbols defined within enums;
@@ -71,6 +79,8 @@ class ParseDataStructs:
         ignore ioctl VIDIOC_ENUM_FMT
         replace ioctl VIDIOC_DQBUF vidioc_qbuf
         replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+
+        namespace MC
     """
 
     # Parser regexes with multiple ways to capture enums and structs
@@ -140,10 +150,96 @@ class ParseDataStructs:
 
         self.symbols = {}
 
+        self.namespace = None
+        self.ignore = []
+        self.replace = []
+
         for symbol_type in self.DEF_SYMBOL_TYPES:
             self.symbols[symbol_type] = {}
 
-    def store_type(self, symbol_type: str, symbol: str,
+    def read_exceptions(self, fname: str):
+        if not fname:
+            return
+
+        name = os.path.basename(fname)
+
+        with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
+            for ln, line in enumerate(f):
+                ln += 1
+                line = line.strip()
+                if not line or line.startswith("#"):
+                    continue
+
+                # ignore rules
+                match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
+
+                if match:
+                    self.ignore.append((ln, match.group(1), match.group(2)))
+                    continue
+
+                # replace rules
+                match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
+                if match:
+                    self.replace.append((ln, match.group(1), match.group(2),
+                                         match.group(3)))
+                    continue
+
+                match = re.match(r"^namespace\s+(\S+)", line)
+                if match:
+                    self.namespace = match.group(1)
+                    continue
+
+                sys.exit(f"{name}:{ln}: invalid line: {line}")
+
+    def apply_exceptions(self):
+        """
+        Process exceptions file with rules to ignore or replace references.
+        """
+
+        # Handle ignore rules
+        for ln, c_type, symbol in self.ignore:
+            if c_type not in self.DEF_SYMBOL_TYPES:
+                sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+            d = self.symbols[c_type]
+            if symbol in d:
+                del d[symbol]
+
+        # Handle replace rules
+        for ln, c_type, old, new in self.replace:
+            if c_type not in self.DEF_SYMBOL_TYPES:
+                sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+            reftype = None
+
+            # Parse reference type when the type is specified
+
+            match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
+            if match:
+                reftype = f":c:{match.group(1)}"
+                new = match.group(2)
+            else:
+                match = re.search(r"(\:ref)\:\`(.+)\`", new)
+                if match:
+                    reftype = match.group(1)
+                    new = match.group(2)
+
+            # If the replacement rule doesn't have a type, get default
+            if not reftype:
+                reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
+                if not reftype:
+                    reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
+
+            new_ref = f"{reftype}:`{old} <{new}>`"
+
+            # Change self.symbols to use the replacement rule
+            if old in self.symbols[c_type]:
+                (_, ln) = self.symbols[c_type][old]
+                self.symbols[c_type][old] = (new_ref, ln)
+            else:
+                print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
+
+    def store_type(self, ln, symbol_type: str, symbol: str,
                    ref_name: str = None, replace_underscores: bool = True):
         """
         Stores a new symbol at self.symbols under symbol_type.
@@ -157,35 +253,42 @@ class ParseDataStructs:
         ref_type = defs.get("ref_type")
 
         # Determine ref_link based on symbol type
-        if ref_type:
-            if symbol_type == "enum":
-                ref_link = f"{ref_type}:`{symbol}`"
-            else:
-                if not ref_name:
-                    ref_name = symbol.lower()
+        if ref_type or self.namespace:
+            if not ref_name:
+                ref_name = symbol.lower()
+
+            # c-type references don't support hash
+            if ref_type == ":ref" and replace_underscores:
+                ref_name = ref_name.replace("_", "-")
 
-                # c-type references don't support hash
-                if ref_type == ":ref" and replace_underscores:
-                    ref_name = ref_name.replace("_", "-")
+            # C domain references may have namespaces
+            if ref_type.startswith(":c:"):
+                if self.namespace:
+                    ref_name = f"{self.namespace}.{ref_name}"
 
+            if ref_type:
                 ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
+            else:
+                ref_link = f"`{symbol} <{ref_name}>`"
         else:
             ref_link = symbol
 
-        self.symbols[symbol_type][symbol] = f"{prefix}{ref_link}{suffix}"
+        self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
 
     def store_line(self, line):
         """Stores a line at self.data, properly indented"""
         line = "    " + line.expandtabs()
         self.data += line.rstrip(" ")
 
-    def parse_file(self, file_in: str):
+    def parse_file(self, file_in: str, exceptions: str = None):
         """Reads a C source file and get identifiers"""
         self.data = ""
         is_enum = False
         is_comment = False
         multiline = ""
 
+        self.read_exceptions(exceptions)
+
         with open(file_in, "r",
                   encoding="utf-8", errors="backslashreplace") as f:
             for line_no, line in enumerate(f):
@@ -240,20 +343,20 @@ class ParseDataStructs:
                 if is_enum:
                     match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
                     if match:
-                        self.store_type("symbol", match.group(1))
+                        self.store_type(line_no, "symbol", match.group(1))
                     if "}" in line:
                         is_enum = False
                     continue
 
                 match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
                 if match:
-                    self.store_type("ioctl", match.group(1),
+                    self.store_type(line_no, "ioctl", match.group(1),
                                     replace_underscores=False)
                     continue
 
                 match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
                 if match:
-                    self.store_type("define", match.group(1))
+                    self.store_type(line_no, "define", match.group(1))
                     continue
 
                 match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
@@ -261,90 +364,23 @@ class ParseDataStructs:
                 if match:
                     name = match.group(2).strip()
                     symbol = match.group(3)
-                    self.store_type("typedef", symbol, ref_name=name)
+                    self.store_type(line_no, "typedef", symbol, ref_name=name)
                     continue
 
                 for re_enum in self.RE_ENUMS:
                     match = re_enum.match(line)
                     if match:
-                        self.store_type("enum", match.group(1))
+                        self.store_type(line_no, "enum", match.group(1))
                         is_enum = True
                         break
 
                 for re_struct in self.RE_STRUCTS:
                     match = re_struct.match(line)
                     if match:
-                        self.store_type("struct", match.group(1))
+                        self.store_type(line_no, "struct", match.group(1))
                         break
 
-    def process_exceptions(self, fname: str):
-        """
-        Process exceptions file with rules to ignore or replace references.
-        """
-        if not fname:
-            return
-
-        name = os.path.basename(fname)
-
-        with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
-            for ln, line in enumerate(f):
-                ln += 1
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-
-                # Handle ignore rules
-                match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
-                if match:
-                    c_type = match.group(1)
-                    symbol = match.group(2)
-
-                    if c_type not in self.DEF_SYMBOL_TYPES:
-                        sys.exit(f"{name}:{ln}: {c_type} is invalid")
-
-                    d = self.symbols[c_type]
-                    if symbol in d:
-                        del d[symbol]
-
-                    continue
-
-                # Handle replace rules
-                match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
-                if not match:
-                    sys.exit(f"{name}:{ln}: invalid line: {line}")
-
-                c_type, old, new = match.groups()
-
-                if c_type not in self.DEF_SYMBOL_TYPES:
-                    sys.exit(f"{name}:{ln}: {c_type} is invalid")
-
-                reftype = None
-
-                # Parse reference type when the type is specified
-
-                match = re.match(r"^\:c\:(data|func|macro|type)\:\`(.+)\`", new)
-                if match:
-                    reftype = f":c:{match.group(1)}"
-                    new = match.group(2)
-                else:
-                    match = re.search(r"(\:ref)\:\`(.+)\`", new)
-                    if match:
-                        reftype = match.group(1)
-                        new = match.group(2)
-
-                # If the replacement rule doesn't have a type, get default
-                if not reftype:
-                    reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
-                    if not reftype:
-                        reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
-
-                new_ref = f"{reftype}:`{old} <{new}>`"
-
-                # Change self.symbols to use the replacement rule
-                if old in self.symbols[c_type]:
-                    self.symbols[c_type][old] = new_ref
-                else:
-                    print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
+        self.apply_exceptions()
 
     def debug_print(self):
         """
@@ -360,8 +396,8 @@ class ParseDataStructs:
 
             print(f"{c_type}:")
 
-            for symbol, ref in sorted(refs.items()):
-                print(f"  {symbol} -> {ref}")
+            for symbol, (ref, ln) in sorted(refs.items()):
+                print(f"  #{ln:<5d} {symbol} -> {ref}")
 
             print()
 
@@ -384,7 +420,7 @@ class ParseDataStructs:
 
         # Process all reference types
         for ref_dict in self.symbols.values():
-            for symbol, replacement in ref_dict.items():
+            for symbol, (replacement, _) in ref_dict.items():
                 symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
                 text = re.sub(fr'{start_delim}{symbol}{end_delim}',
                               fr'\1{replacement}\2', text)
@@ -397,16 +433,10 @@ class ParseDataStructs:
 
     def gen_toc(self):
         """
-        Create a TOC table pointing to each symbol from the header
+        Create a list of symbols to be part of a TOC contents table
         """
         text = []
 
-        # Add header
-        text.append(".. contents:: Table of Contents")
-        text.append("   :depth: 2")
-        text.append("   :local:")
-        text.append("")
-
         # Sort symbol types per description
         symbol_descriptions = []
         for k, v in self.DEF_SYMBOL_TYPES.items():
@@ -426,8 +456,8 @@ class ParseDataStructs:
             text.append("")
 
             # Sort symbols alphabetically
-            for symbol, ref in sorted(refs.items()):
-                text.append(f"* :{ref}:")
+            for symbol, (ref, ln) in sorted(refs.items()):
+                text.append(f"- LINENO_{ln}: {ref}")
 
             text.append("")  # Add empty line between categories
 
diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py
new file mode 100644
index 000000000000..e83088013db2
--- /dev/null
+++ b/tools/lib/python/kdoc/python_version.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+
+"""
+Handle Python version check logic.
+
+Not all Python versions are supported by scripts. Yet, on some cases,
+like during documentation build, a newer version of python could be
+available.
+
+This class allows checking if the minimal requirements are followed.
+
+Better than that, PythonVersion.check_python() not only checks the minimal
+requirements, but it automatically switches to a the newest available
+Python version if present.
+
+"""
+
+import os
+import re
+import subprocess
+import shlex
+import sys
+
+from glob import glob
+from textwrap import indent
+
+class PythonVersion:
+    """
+    Ancillary methods that checks for missing dependencies for different
+    types of types, like binaries, python modules, rpm deps, etc.
+    """
+
+    def __init__(self, version):
+        """Ïnitialize self.version tuple from a version string"""
+        self.version = self.parse_version(version)
+
+    @staticmethod
+    def parse_version(version):
+        """Convert a major.minor.patch version into a tuple"""
+        return tuple(int(x) for x in version.split("."))
+
+    @staticmethod
+    def ver_str(version):
+        """Returns a version tuple as major.minor.patch"""
+        return ".".join([str(x) for x in version])
+
+    @staticmethod
+    def cmd_print(cmd, max_len=80):
+        cmd_line = []
+
+        for w in cmd:
+            w = shlex.quote(w)
+
+            if cmd_line:
+                if not max_len or len(cmd_line[-1]) + len(w) < max_len:
+                    cmd_line[-1] += " " + w
+                    continue
+                else:
+                    cmd_line[-1] += " \\"
+                    cmd_line.append(w)
+            else:
+                cmd_line.append(w)
+
+        return "\n  ".join(cmd_line)
+
+    def __str__(self):
+        """Returns a version tuple as major.minor.patch from self.version"""
+        return self.ver_str(self.version)
+
+    @staticmethod
+    def get_python_version(cmd):
+        """
+        Get python version from a Python binary. As we need to detect if
+        are out there newer python binaries, we can't rely on sys.release here.
+        """
+
+        kwargs = {}
+        if sys.version_info < (3, 7):
+            kwargs['universal_newlines'] = True
+        else:
+            kwargs['text'] = True
+
+        result = subprocess.run([cmd, "--version"],
+                                stdout = subprocess.PIPE,
+                                stderr = subprocess.PIPE,
+                                **kwargs, check=False)
+
+        version = result.stdout.strip()
+
+        match = re.search(r"(\d+\.\d+\.\d+)", version)
+        if match:
+            return PythonVersion.parse_version(match.group(1))
+
+        print(f"Can't parse version {version}")
+        return (0, 0, 0)
+
+    @staticmethod
+    def find_python(min_version):
+        """
+        Detect if are out there any python 3.xy version newer than the
+        current one.
+
+        Note: this routine is limited to up to 2 digits for python3. We
+        may need to update it one day, hopefully on a distant future.
+        """
+        patterns = [
+            "python3.[0-9][0-9]",
+            "python3.[0-9]",
+        ]
+
+        python_cmd = []
+
+        # Seek for a python binary newer than min_version
+        for path in os.getenv("PATH", "").split(":"):
+            for pattern in patterns:
+                for cmd in glob(os.path.join(path, pattern)):
+                    if os.path.isfile(cmd) and os.access(cmd, os.X_OK):
+                        version = PythonVersion.get_python_version(cmd)
+                        if version >= min_version:
+                            python_cmd.append((version, cmd))
+
+        return sorted(python_cmd, reverse=True)
+
+    @staticmethod
+    def check_python(min_version, show_alternatives=False, bail_out=False,
+                     success_on_error=False):
+        """
+        Check if the current python binary satisfies our minimal requirement
+        for Sphinx build. If not, re-run with a newer version if found.
+        """
+        cur_ver = sys.version_info[:3]
+        if cur_ver >= min_version:
+            ver = PythonVersion.ver_str(cur_ver)
+            return
+
+        python_ver = PythonVersion.ver_str(cur_ver)
+
+        available_versions = PythonVersion.find_python(min_version)
+        if not available_versions:
+            print(f"ERROR: Python version {python_ver} is not supported anymore\n")
+            print("       Can't find a new version. This script may fail")
+            return
+
+        script_path = os.path.abspath(sys.argv[0])
+
+        # Check possible alternatives
+        if available_versions:
+            new_python_cmd = available_versions[0][1]
+        else:
+            new_python_cmd = None
+
+        if show_alternatives and available_versions:
+            print("You could run, instead:")
+            for _, cmd in available_versions:
+                args = [cmd, script_path] + sys.argv[1:]
+
+                cmd_str = indent(PythonVersion.cmd_print(args), "  ")
+                print(f"{cmd_str}\n")
+
+        if bail_out:
+            msg = f"Python {python_ver} not supported. Bailing out"
+            if success_on_error:
+                print(msg, file=sys.stderr)
+                sys.exit(0)
+            else:
+                sys.exit(msg)
+
+        print(f"Python {python_ver} not supported. Changing to {new_python_cmd}")
+
+        # Restart script using the newer version
+        args = [new_python_cmd, script_path] + sys.argv[1:]
+
+        try:
+            os.execv(new_python_cmd, args)
+        except OSError as e:
+            sys.exit(f"Failed to restart with {new_python_cmd}: {e}")
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index 211df5a93ad9..7736b492f559 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -12,10 +12,13 @@ endif
 libdir  ?= $(prefix)/$(libdir_relative)
 includedir ?= $(prefix)/include
 
-SUBDIRS = lib generated samples
+SPECDIR=../../../Documentation/netlink/specs
+
+SUBDIRS = lib generated samples ynltool tests
 
 all: $(SUBDIRS) libynl.a
 
+ynltool: | lib generated libynl.a
 samples: | lib generated
 libynl.a: | lib generated
 	@echo -e "\tAR $@"
@@ -48,5 +51,27 @@ install: libynl.a lib/*.h
 	@echo -e "\tINSTALL pyynl"
 	@pip install --prefix=$(DESTDIR)$(prefix) .
 	@make -C generated install
+	@make -C tests install
+
+run_tests:
+	@$(MAKE) -C tests run_tests
+
+lint:
+	yamllint $(SPECDIR)
+
+schema_check:
+	@N=1; \
+	for spec in $(SPECDIR)/*.yaml ; do \
+		NAME=$$(basename $$spec) ; \
+		OUTPUT=$$(./pyynl/cli.py --spec $$spec --validate) ; \
+		if [ $$? -eq 0 ] ; then \
+			echo "ok $$N $$NAME schema validation" ; \
+		else \
+			echo "not ok $$N $$NAME schema validation" ; \
+			echo "$$OUTPUT" ; \
+			echo ; \
+		fi ; \
+		N=$$((N+1)) ; \
+	done
 
-.PHONY: all clean distclean install $(SUBDIRS)
+.PHONY: all clean distclean install run_tests lint schema_check $(SUBDIRS)
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 29481989ea76..ced7dce44efb 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -313,7 +313,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
 	struct nlattr *attr;
 	size_t len;
 
-	len = strlen(str);
+	len = strlen(str) + 1;
 	if (__ynl_attr_put_overflow(nlh, len))
 		return;
 
@@ -321,7 +321,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
 	attr->nla_type = attr_type;
 
 	strcpy((char *)ynl_attr_data(attr), str);
-	attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len);
+	attr->nla_len = NLA_HDRLEN + len;
 
 	nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len);
 }
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 8c192e900bd3..af02a5b7e5a2 100755
--- a/tools/net/ynl/pyynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -7,9 +7,10 @@ import os
 import pathlib
 import pprint
 import sys
+import textwrap
 
 sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
-from lib import YnlFamily, Netlink, NlError
+from lib import YnlFamily, Netlink, NlError, SpecFamily
 
 sys_schema_dir='/usr/share/ynl'
 relative_schema_dir='../../../../Documentation/netlink'
@@ -39,6 +40,60 @@ class YnlEncoder(json.JSONEncoder):
         return json.JSONEncoder.default(self, obj)
 
 
+def print_attr_list(ynl, attr_names, attr_set, indent=2):
+    """Print a list of attributes with their types and documentation."""
+    prefix = ' ' * indent
+    for attr_name in attr_names:
+        if attr_name in attr_set.attrs:
+            attr = attr_set.attrs[attr_name]
+            attr_info = f'{prefix}- {attr_name}: {attr.type}'
+            if 'enum' in attr.yaml:
+                enum_name = attr.yaml['enum']
+                attr_info += f" (enum: {enum_name})"
+                # Print enum values if available
+                if enum_name in ynl.consts:
+                    const = ynl.consts[enum_name]
+                    enum_values = list(const.entries.keys())
+                    attr_info += f"\n{prefix}  {const.type.capitalize()}: {', '.join(enum_values)}"
+
+            # Show nested attributes reference and recursively display them
+            nested_set_name = None
+            if attr.type == 'nest' and 'nested-attributes' in attr.yaml:
+                nested_set_name = attr.yaml['nested-attributes']
+                attr_info += f" -> {nested_set_name}"
+
+            if attr.yaml.get('doc'):
+                doc_text = textwrap.indent(attr.yaml['doc'], prefix + '  ')
+                attr_info += f"\n{doc_text}"
+            print(attr_info)
+
+            # Recursively show nested attributes
+            if nested_set_name in ynl.attr_sets:
+                nested_set = ynl.attr_sets[nested_set_name]
+                # Filter out 'unspec' and other unused attrs
+                nested_names = [n for n in nested_set.attrs.keys()
+                                if nested_set.attrs[n].type != 'unused']
+                if nested_names:
+                    print_attr_list(ynl, nested_names, nested_set, indent + 4)
+
+
+def print_mode_attrs(ynl, mode, mode_spec, attr_set, print_request=True):
+    """Print a given mode (do/dump/event/notify)."""
+    mode_title = mode.capitalize()
+
+    if print_request and 'request' in mode_spec and 'attributes' in mode_spec['request']:
+        print(f'\n{mode_title} request attributes:')
+        print_attr_list(ynl, mode_spec['request']['attributes'], attr_set)
+
+    if 'reply' in mode_spec and 'attributes' in mode_spec['reply']:
+        print(f'\n{mode_title} reply attributes:')
+        print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set)
+
+    if 'attributes' in mode_spec:
+        print(f'\n{mode_title} attributes:')
+        print_attr_list(ynl, mode_spec['attributes'], attr_set)
+
+
 def main():
     description = """
     YNL CLI utility - a general purpose netlink utility that uses YAML
@@ -70,6 +125,9 @@ def main():
     group.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str)
     group.add_argument('--list-ops', action='store_true')
     group.add_argument('--list-msgs', action='store_true')
+    group.add_argument('--list-attrs', dest='list_attrs', metavar='OPERATION', type=str,
+                       help='List attributes for an operation')
+    group.add_argument('--validate', action='store_true')
 
     parser.add_argument('--duration', dest='duration', type=int,
                         help='when subscribed, watch for DURATION seconds')
@@ -111,15 +169,25 @@ def main():
 
     if args.family:
         spec = f"{spec_dir()}/{args.family}.yaml"
-        if args.schema is None and spec.startswith(sys_schema_dir):
-            args.schema = '' # disable schema validation when installed
-        if args.process_unknown is None:
-            args.process_unknown = True
     else:
         spec = args.spec
     if not os.path.isfile(spec):
         raise Exception(f"Spec file {spec} does not exist")
 
+    if args.validate:
+        try:
+            SpecFamily(spec, args.schema)
+        except Exception as error:
+            print(error)
+            exit(1)
+        return
+
+    if args.family: # set behaviour when using installed specs
+        if args.schema is None and spec.startswith(sys_schema_dir):
+            args.schema = '' # disable schema validation when installed
+        if args.process_unknown is None:
+            args.process_unknown = True
+
     ynl = YnlFamily(spec, args.schema, args.process_unknown,
                     recv_size=args.dbg_small_recv)
     if args.dbg_small_recv:
@@ -135,6 +203,28 @@ def main():
         for op_name, op in ynl.msgs.items():
             print(op_name, " [", ", ".join(op.modes), "]")
 
+    if args.list_attrs:
+        op = ynl.msgs.get(args.list_attrs)
+        if not op:
+            print(f'Operation {args.list_attrs} not found')
+            exit(1)
+
+        print(f'Operation: {op.name}')
+        print(op.yaml['doc'])
+
+        for mode in ['do', 'dump', 'event']:
+            if mode in op.yaml:
+                print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set, True)
+
+        if 'notify' in op.yaml:
+            mode_spec = op.yaml['notify']
+            ref_spec = ynl.msgs.get(mode_spec).yaml.get('do')
+            if ref_spec:
+                print_mode_attrs(ynl, 'notify', ref_spec, op.attr_set, False)
+
+        if 'mcgrp' in op.yaml:
+            print(f"\nMulticast group: {op.yaml['mcgrp']}")
+
     try:
         if args.do:
             reply = ynl.do(args.do, attrs, args.flags)
diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index 9b523cbb3568..fd0f6b8d54d1 100755
--- a/tools/net/ynl/pyynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -44,6 +44,9 @@ def print_field(reply, *desc):
     Pretty-print a set of fields from the reply. desc specifies the
     fields and the optional type (bool/yn).
     """
+    if not reply:
+        return
+
     if len(desc) == 0:
         return print_field(reply, *zip(reply.keys(), reply.keys()))
 
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index 62383c70ebb9..36d36eb7e3b8 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -100,12 +100,21 @@ class Netlink:
                                   'bitfield32', 'sint', 'uint'])
 
 class NlError(Exception):
-  def __init__(self, nl_msg):
-    self.nl_msg = nl_msg
-    self.error = -nl_msg.error
-
-  def __str__(self):
-    return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}"
+    def __init__(self, nl_msg):
+        self.nl_msg = nl_msg
+        self.error = -nl_msg.error
+
+    def __str__(self):
+        msg = "Netlink error: "
+
+        extack = self.nl_msg.extack.copy() if self.nl_msg.extack else {}
+        if 'msg' in extack:
+            msg += extack['msg'] + ': '
+            del extack['msg']
+        msg += os.strerror(self.error)
+        if extack:
+            msg += ' ' + str(extack)
+        return msg
 
 
 class ConfigError(Exception):
@@ -976,6 +985,15 @@ class YnlFamily(SpecFamily):
                 raw = bytes.fromhex(string)
             else:
                 raw = int(string, 16)
+        elif attr_spec.display_hint == 'mac':
+            # Parse MAC address in format "00:11:22:33:44:55" or "001122334455"
+            if ':' in string:
+                mac_bytes = [int(x, 16) for x in string.split(':')]
+            else:
+                if len(string) % 2 != 0:
+                    raise Exception(f"Invalid MAC address format: {string}")
+                mac_bytes = [int(string[i:i+2], 16) for i in range(0, len(string), 2)]
+            raw = bytes(mac_bytes)
         else:
             raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented"
                             f" when parsing '{attr_spec['name']}'")
@@ -1039,15 +1057,15 @@ class YnlFamily(SpecFamily):
                     self.check_ntf()
 
     def operation_do_attributes(self, name):
-      """
-      For a given operation name, find and return a supported
-      set of attributes (as a dict).
-      """
-      op = self.find_operation(name)
-      if not op:
-        return None
-
-      return op['do']['request']['attributes'].copy()
+        """
+        For a given operation name, find and return a supported
+        set of attributes (as a dict).
+        """
+        op = self.find_operation(name)
+        if not op:
+            return None
+
+        return op['do']['request']['attributes'].copy()
 
     def _encode_message(self, op, vals, flags, req_seq):
         nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index 58086b101057..b517d0c605ad 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -861,6 +861,18 @@ class TypeIndexedArray(Type):
         return [f"{member} = {self.c_name};",
                 f"{presence} = n_{self.c_name};"]
 
+    def free_needs_iter(self):
+        return self.sub_type == 'nest'
+
+    def _free_lines(self, ri, var, ref):
+        lines = []
+        if self.sub_type == 'nest':
+            lines += [
+                f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
+                f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
+            ]
+        lines += f"free({var}->{ref}{self.c_name});",
+        return lines
 
 class TypeNestTypeValue(Type):
     def _complex_member_type(self, ri):
@@ -1193,7 +1205,7 @@ class SubMessage(SpecSubMessage):
 
 
 class Family(SpecFamily):
-    def __init__(self, file_name, exclude_ops):
+    def __init__(self, file_name, exclude_ops, fn_prefix):
         # Added by resolve:
         self.c_name = None
         delattr(self, "c_name")
@@ -1225,6 +1237,8 @@ class Family(SpecFamily):
         else:
             self.uapi_header_name = self.ident_name
 
+        self.fn_prefix = fn_prefix if fn_prefix else f'{self.ident_name}-nl'
+
     def resolve(self):
         self.resolve_up(super())
 
@@ -2899,12 +2913,12 @@ def print_kernel_op_table_fwd(family, cw, terminate):
             continue
 
         if 'do' in op:
-            name = c_lower(f"{family.ident_name}-nl-{op_name}-doit")
+            name = c_lower(f"{family.fn_prefix}-{op_name}-doit")
             cw.write_func_prot('int', name,
                                ['struct sk_buff *skb', 'struct genl_info *info'], suffix=';')
 
         if 'dump' in op:
-            name = c_lower(f"{family.ident_name}-nl-{op_name}-dumpit")
+            name = c_lower(f"{family.fn_prefix}-{op_name}-dumpit")
             cw.write_func_prot('int', name,
                                ['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';')
     cw.nl()
@@ -2930,7 +2944,7 @@ def print_kernel_op_table(family, cw):
                                             for x in op['dont-validate']])), )
             for op_mode in ['do', 'dump']:
                 if op_mode in op:
-                    name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
+                    name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it")
                     members.append((op_mode + 'it', name))
             if family.kernel_policy == 'per-op':
                 struct = Struct(family, op['attribute-set'],
@@ -2968,7 +2982,7 @@ def print_kernel_op_table(family, cw):
                         members.append(('validate',
                                         ' | '.join([c_upper('genl-dont-validate-' + x)
                                                     for x in dont_validate])), )
-                name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
+                name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it")
                 if 'pre' in op[op_mode]:
                     members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre'])))
                 members.append((op_mode + 'it', name))
@@ -3390,6 +3404,7 @@ def main():
                         help='Do not overwrite the output file if the new output is identical to the old')
     parser.add_argument('--exclude-op', action='append', default=[])
     parser.add_argument('-o', dest='out_file', type=str, default=None)
+    parser.add_argument('--function-prefix', dest='fn_prefix', type=str)
     args = parser.parse_args()
 
     if args.header is None:
@@ -3398,7 +3413,7 @@ def main():
     exclude_ops = [re.compile(expr) for expr in args.exclude_op]
 
     try:
-        parsed = Family(args.spec, exclude_ops)
+        parsed = Family(args.spec, exclude_ops, args.fn_prefix)
         if parsed.license != '((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)':
             print('Spec license:', parsed.license)
             print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
@@ -3418,11 +3433,16 @@ def main():
     cw.p("/* Do not edit directly, auto-generated from: */")
     cw.p(f"/*\t{spec_kernel} */")
     cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */")
-    if args.exclude_op or args.user_header:
+    if args.exclude_op or args.user_header or args.fn_prefix:
         line = ''
-        line += ' --user-header '.join([''] + args.user_header)
-        line += ' --exclude-op '.join([''] + args.exclude_op)
+        if args.user_header:
+            line += ' --user-header '.join([''] + args.user_header)
+        if args.exclude_op:
+            line += ' --exclude-op '.join([''] + args.exclude_op)
+        if args.fn_prefix:
+            line += f' --function-prefix {args.fn_prefix}'
         cw.p(f'/* YNL-ARG{line} */')
+    cw.p('/* To regenerate run: tools/net/ynl/ynl-regen.sh */')
     cw.nl()
 
     if args.mode == 'uapi':
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index 7f5fca7682d7..05087ee323ba 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -7,3 +7,4 @@ rt-addr
 rt-link
 rt-route
 tc
+tc-filter-add
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index c9494a564da4..d76cbd41cbb1 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -19,6 +19,7 @@ include $(wildcard *.d)
 all: $(BINS)
 
 CFLAGS_page-pool=$(CFLAGS_netdev)
+CFLAGS_tc-filter-add:=$(CFLAGS_tc)
 
 $(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS)
 	@echo -e '\tCC sample $@'
diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c
deleted file mode 100644
index e5d521320fbf..000000000000
--- a/tools/net/ynl/samples/page-pool.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <string.h>
-
-#include <ynl.h>
-
-#include <net/if.h>
-
-#include "netdev-user.h"
-
-struct stat {
-	unsigned int ifc;
-
-	struct {
-		unsigned int cnt;
-		size_t refs, bytes;
-	} live[2];
-
-	size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache;
-};
-
-struct stats_array {
-	unsigned int i, max;
-	struct stat *s;
-};
-
-static struct stat *find_ifc(struct stats_array *a, unsigned int ifindex)
-{
-	unsigned int i;
-
-	for (i = 0; i < a->i; i++) {
-		if (a->s[i].ifc == ifindex)
-			return &a->s[i];
-	}
-
-	a->i++;
-	if (a->i == a->max) {
-		a->max *= 2;
-		a->s = reallocarray(a->s, a->max, sizeof(*a->s));
-	}
-	a->s[i].ifc = ifindex;
-	return &a->s[i];
-}
-
-static void count(struct stat *s, unsigned int l,
-		  struct netdev_page_pool_get_rsp *pp)
-{
-	s->live[l].cnt++;
-	if (pp->_present.inflight)
-		s->live[l].refs += pp->inflight;
-	if (pp->_present.inflight_mem)
-		s->live[l].bytes += pp->inflight_mem;
-}
-
-int main(int argc, char **argv)
-{
-	struct netdev_page_pool_stats_get_list *pp_stats;
-	struct netdev_page_pool_get_list *pools;
-	struct stats_array a = {};
-	struct ynl_error yerr;
-	struct ynl_sock *ys;
-
-	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
-	if (!ys) {
-		fprintf(stderr, "YNL: %s\n", yerr.msg);
-		return 1;
-	}
-
-	a.max = 128;
-	a.s = calloc(a.max, sizeof(*a.s));
-	if (!a.s)
-		goto err_close;
-
-	pools = netdev_page_pool_get_dump(ys);
-	if (!pools)
-		goto err_free;
-
-	ynl_dump_foreach(pools, pp) {
-		struct stat *s = find_ifc(&a, pp->ifindex);
-
-		count(s, 1, pp);
-		if (pp->_present.detach_time)
-			count(s, 0, pp);
-	}
-	netdev_page_pool_get_list_free(pools);
-
-	pp_stats = netdev_page_pool_stats_get_dump(ys);
-	if (!pp_stats)
-		goto err_free;
-
-	ynl_dump_foreach(pp_stats, pp) {
-		struct stat *s = find_ifc(&a, pp->info.ifindex);
-
-		if (pp->_present.alloc_fast)
-			s->alloc_fast += pp->alloc_fast;
-		if (pp->_present.alloc_refill)
-			s->alloc_fast += pp->alloc_refill;
-		if (pp->_present.alloc_slow)
-			s->alloc_slow += pp->alloc_slow;
-		if (pp->_present.recycle_ring)
-			s->recycle_ring += pp->recycle_ring;
-		if (pp->_present.recycle_cached)
-			s->recycle_cache += pp->recycle_cached;
-	}
-	netdev_page_pool_stats_get_list_free(pp_stats);
-
-	for (unsigned int i = 0; i < a.i; i++) {
-		char ifname[IF_NAMESIZE];
-		struct stat *s = &a.s[i];
-		const char *name;
-		double recycle;
-
-		if (!s->ifc) {
-			name = "<orphan>\t";
-		} else {
-			name = if_indextoname(s->ifc, ifname);
-			if (name)
-				printf("%8s", name);
-			printf("[%u]\t", s->ifc);
-		}
-
-		printf("page pools: %u (zombies: %u)\n",
-		       s->live[1].cnt, s->live[0].cnt);
-		printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n",
-		       s->live[1].refs, s->live[1].bytes,
-		       s->live[0].refs, s->live[0].bytes);
-
-		/* We don't know how many pages are sitting in cache and ring
-		 * so we will under-count the recycling rate a bit.
-		 */
-		recycle = (double)(s->recycle_ring + s->recycle_cache) /
-			(s->alloc_fast + s->alloc_slow) * 100;
-		printf("\t\trecycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)\n",
-		       recycle, s->alloc_slow, s->alloc_fast,
-		       s->recycle_ring, s->recycle_cache);
-	}
-
-	ynl_sock_destroy(ys);
-	return 0;
-
-err_free:
-	free(a.s);
-err_close:
-	fprintf(stderr, "YNL: %s\n", ys->err.msg);
-	ynl_sock_destroy(ys);
-	return 2;
-}
diff --git a/tools/net/ynl/samples/tc-filter-add.c b/tools/net/ynl/samples/tc-filter-add.c
new file mode 100644
index 000000000000..97871e9e9edc
--- /dev/null
+++ b/tools/net/ynl/samples/tc-filter-add.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <arpa/inet.h>
+#include <linux/pkt_sched.h>
+#include <linux/tc_act/tc_vlan.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/if_ether.h>
+#include <net/if.h>
+
+#include <ynl.h>
+
+#include "tc-user.h"
+
+#define TC_HANDLE (0xFFFF << 16)
+
+const char *vlan_act_name(struct tc_vlan *p)
+{
+	switch (p->v_action) {
+	case TCA_VLAN_ACT_POP:
+		return "pop";
+	case TCA_VLAN_ACT_PUSH:
+		return "push";
+	case TCA_VLAN_ACT_MODIFY:
+		return "modify";
+	default:
+		break;
+	}
+
+	return "not supported";
+}
+
+const char *gact_act_name(struct tc_gact *p)
+{
+	switch (p->action) {
+	case TC_ACT_SHOT:
+		return "drop";
+	case TC_ACT_OK:
+		return "ok";
+	case TC_ACT_PIPE:
+		return "pipe";
+	default:
+		break;
+	}
+
+	return "not supported";
+}
+
+static void print_vlan(struct tc_act_vlan_attrs *vlan)
+{
+	printf("%s ", vlan_act_name(vlan->parms));
+	if (vlan->_present.push_vlan_id)
+		printf("id %u ", vlan->push_vlan_id);
+	if (vlan->_present.push_vlan_protocol)
+		printf("protocol %#x ", ntohs(vlan->push_vlan_protocol));
+	if (vlan->_present.push_vlan_priority)
+		printf("priority %u ", vlan->push_vlan_priority);
+}
+
+static void print_gact(struct tc_act_gact_attrs *gact)
+{
+	struct tc_gact *p = gact->parms;
+
+	printf("%s ", gact_act_name(p));
+}
+
+static void flower_print(struct tc_flower_attrs *flower, const char *kind)
+{
+	struct tc_act_attrs *a;
+	unsigned int i;
+
+	printf("%s:\n", kind);
+
+	if (flower->_present.key_vlan_id)
+		printf("  vlan_id: %u\n", flower->key_vlan_id);
+	if (flower->_present.key_vlan_prio)
+		printf("  vlan_prio: %u\n", flower->key_vlan_prio);
+	if (flower->_present.key_num_of_vlans)
+		printf("  num_of_vlans: %u\n", flower->key_num_of_vlans);
+
+	for (i = 0; i < flower->_count.act; i++) {
+		a = &flower->act[i];
+		printf("action order: %i %s ", i + 1, a->kind);
+		if (a->options._present.vlan)
+			print_vlan(&a->options.vlan);
+		else if (a->options._present.gact)
+			print_gact(&a->options.gact);
+		printf("\n");
+	}
+	printf("\n");
+}
+
+static void tc_filter_print(struct tc_gettfilter_rsp *f)
+{
+	struct tc_options_msg *opt = &f->options;
+
+	if (opt->_present.flower)
+		flower_print(&opt->flower, f->kind);
+	else if (f->_len.kind)
+		printf("%s pref %u proto: %#x\n", f->kind,
+		       (f->_hdr.tcm_info >> 16),
+			ntohs(TC_H_MIN(f->_hdr.tcm_info)));
+}
+
+static int tc_filter_add(struct ynl_sock *ys, int ifi)
+{
+	struct tc_newtfilter_req *req;
+	struct tc_act_attrs *acts;
+	struct tc_vlan p = {
+		.action = TC_ACT_PIPE,
+		.v_action = TCA_VLAN_ACT_PUSH
+	};
+	__u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
+	int ret;
+
+	req = tc_newtfilter_req_alloc();
+	if (!req) {
+		fprintf(stderr, "tc_newtfilter_req_alloc failed\n");
+		return -1;
+	}
+	memset(req, 0, sizeof(*req));
+
+	acts = tc_act_attrs_alloc(3);
+	if (!acts) {
+		fprintf(stderr, "tc_act_attrs_alloc\n");
+		tc_newtfilter_req_free(req);
+		return -1;
+	}
+	memset(acts, 0, sizeof(*acts) * 3);
+
+	req->_hdr.tcm_ifindex = ifi;
+	req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+	req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q));
+	req->chain = 0;
+
+	tc_newtfilter_req_set_nlflags(req, flags);
+	tc_newtfilter_req_set_kind(req, "flower");
+	tc_newtfilter_req_set_options_flower_key_vlan_id(req, 100);
+	tc_newtfilter_req_set_options_flower_key_vlan_prio(req, 5);
+	tc_newtfilter_req_set_options_flower_key_num_of_vlans(req, 3);
+
+	__tc_newtfilter_req_set_options_flower_act(req, acts, 3);
+
+	/* Skip action at index 0 because in TC, the action array
+	 * index starts at 1, with each index defining the action's
+	 * order. In contrast, in YNL indexed arrays start at index 0.
+	 */
+	tc_act_attrs_set_kind(&acts[1], "vlan");
+	tc_act_attrs_set_options_vlan_parms(&acts[1], &p, sizeof(p));
+	tc_act_attrs_set_options_vlan_push_vlan_id(&acts[1], 200);
+	tc_act_attrs_set_kind(&acts[2], "vlan");
+	tc_act_attrs_set_options_vlan_parms(&acts[2], &p, sizeof(p));
+	tc_act_attrs_set_options_vlan_push_vlan_id(&acts[2], 300);
+
+	tc_newtfilter_req_set_options_flower_flags(req, 0);
+	tc_newtfilter_req_set_options_flower_key_eth_type(req, htons(0x8100));
+
+	ret = tc_newtfilter(ys, req);
+	if (ret)
+		fprintf(stderr, "tc_newtfilter: %s\n", ys->err.msg);
+
+	tc_newtfilter_req_free(req);
+
+	return ret;
+}
+
+static int tc_filter_show(struct ynl_sock *ys, int ifi)
+{
+	struct tc_gettfilter_req_dump *req;
+	struct tc_gettfilter_list *rsp;
+
+	req = tc_gettfilter_req_dump_alloc();
+	if (!req) {
+		fprintf(stderr, "tc_gettfilter_req_dump_alloc failed\n");
+		return -1;
+	}
+	memset(req, 0, sizeof(*req));
+
+	req->_hdr.tcm_ifindex = ifi;
+	req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+	req->_present.chain = 1;
+	req->chain = 0;
+
+	rsp = tc_gettfilter_dump(ys, req);
+	tc_gettfilter_req_dump_free(req);
+	if (!rsp) {
+		fprintf(stderr, "YNL: %s\n", ys->err.msg);
+		return -1;
+	}
+
+	if (ynl_dump_empty(rsp))
+		fprintf(stderr, "Error: no filters reported\n");
+	else
+		ynl_dump_foreach(rsp, flt) tc_filter_print(flt);
+
+	tc_gettfilter_list_free(rsp);
+
+	return 0;
+}
+
+static int tc_filter_del(struct ynl_sock *ys, int ifi)
+{
+	struct tc_deltfilter_req *req;
+	__u16 flags = NLM_F_REQUEST;
+	int ret;
+
+	req = tc_deltfilter_req_alloc();
+	if (!req) {
+		fprintf(stderr, "tc_deltfilter_req_alloc failed\n");
+		return -1;
+	}
+	memset(req, 0, sizeof(*req));
+
+	req->_hdr.tcm_ifindex = ifi;
+	req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+	req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q));
+	tc_deltfilter_req_set_nlflags(req, flags);
+
+	ret = tc_deltfilter(ys, req);
+	if (ret)
+		fprintf(stderr, "tc_deltfilter failed: %s\n", ys->err.msg);
+
+	tc_deltfilter_req_free(req);
+
+	return ret;
+}
+
+static int tc_clsact_add(struct ynl_sock *ys, int ifi)
+{
+	struct tc_newqdisc_req *req;
+	__u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
+	int ret;
+
+	req = tc_newqdisc_req_alloc();
+	if (!req) {
+		fprintf(stderr, "tc_newqdisc_req_alloc failed\n");
+		return -1;
+	}
+	memset(req, 0, sizeof(*req));
+
+	req->_hdr.tcm_ifindex = ifi;
+	req->_hdr.tcm_parent = TC_H_CLSACT;
+	req->_hdr.tcm_handle = TC_HANDLE;
+	tc_newqdisc_req_set_nlflags(req, flags);
+	tc_newqdisc_req_set_kind(req, "clsact");
+
+	ret = tc_newqdisc(ys, req);
+	if (ret)
+		fprintf(stderr, "tc_newqdisc failed: %s\n", ys->err.msg);
+
+	tc_newqdisc_req_free(req);
+
+	return ret;
+}
+
+static int tc_clsact_del(struct ynl_sock *ys, int ifi)
+{
+	struct tc_delqdisc_req *req;
+	__u16 flags = NLM_F_REQUEST;
+	int ret;
+
+	req = tc_delqdisc_req_alloc();
+	if (!req) {
+		fprintf(stderr, "tc_delqdisc_req_alloc failed\n");
+		return -1;
+	}
+	memset(req, 0, sizeof(*req));
+
+	req->_hdr.tcm_ifindex = ifi;
+	req->_hdr.tcm_parent = TC_H_CLSACT;
+	req->_hdr.tcm_handle = TC_HANDLE;
+	tc_delqdisc_req_set_nlflags(req, flags);
+
+	ret = tc_delqdisc(ys, req);
+	if (ret)
+		fprintf(stderr, "tc_delqdisc failed: %s\n", ys->err.msg);
+
+	tc_delqdisc_req_free(req);
+
+	return ret;
+}
+
+static int tc_filter_config(struct ynl_sock *ys, int ifi)
+{
+	int ret = 0;
+
+	if (tc_filter_add(ys, ifi))
+		return -1;
+
+	ret = tc_filter_show(ys, ifi);
+
+	if (tc_filter_del(ys, ifi))
+		return -1;
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int ifi, ret = 0;
+
+	if (argc < 2) {
+		fprintf(stderr, "Usage: %s <interface_name>\n", argv[0]);
+		return 1;
+	}
+	ifi = if_nametoindex(argv[1]);
+	if (!ifi) {
+		perror("if_nametoindex");
+		return 1;
+	}
+
+	ys = ynl_sock_create(&ynl_tc_family, &yerr);
+	if (!ys) {
+		fprintf(stderr, "YNL: %s\n", yerr.msg);
+		return 1;
+	}
+
+	if (tc_clsact_add(ys, ifi)) {
+		ret = 2;
+		goto err_destroy;
+	}
+
+	if (tc_filter_config(ys, ifi))
+		ret = 3;
+
+	if (tc_clsact_del(ys, ifi))
+		ret = 4;
+
+err_destroy:
+	ynl_sock_destroy(ys);
+	return ret;
+}
diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile
new file mode 100644
index 000000000000..c1df2e001255
--- /dev/null
+++ b/tools/net/ynl/tests/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for YNL tests
+
+TESTS := \
+	test_ynl_cli.sh \
+	test_ynl_ethtool.sh \
+# end of TESTS
+
+all: $(TESTS)
+
+run_tests:
+	@for test in $(TESTS); do \
+		./$$test; \
+	done
+
+install: $(TESTS)
+	@mkdir -p $(DESTDIR)/usr/bin
+	@mkdir -p $(DESTDIR)/usr/share/kselftest
+	@cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(DESTDIR)/usr/share/kselftest/
+	@for test in $(TESTS); do \
+		name=$$(basename $$test .sh); \
+		sed -e 's|^ynl=.*|ynl="ynl"|' \
+		    -e 's|^ynl_ethtool=.*|ynl_ethtool="ynl-ethtool"|' \
+		    -e 's|KSELFTEST_KTAP_HELPERS=.*|KSELFTEST_KTAP_HELPERS="/usr/share/kselftest/ktap_helpers.sh"|' \
+		    $$test > $(DESTDIR)/usr/bin/$$name; \
+		chmod +x $(DESTDIR)/usr/bin/$$name; \
+	done
+
+clean distclean:
+	@# Nothing to clean
+
+.PHONY: all install clean run_tests
diff --git a/tools/net/ynl/tests/config b/tools/net/ynl/tests/config
new file mode 100644
index 000000000000..339f1309c03f
--- /dev/null
+++ b/tools/net/ynl/tests/config
@@ -0,0 +1,6 @@
+CONFIG_DUMMY=m
+CONFIG_INET_DIAG=y
+CONFIG_IPV6=y
+CONFIG_NET_NS=y
+CONFIG_NETDEVSIM=m
+CONFIG_VETH=m
diff --git a/tools/net/ynl/tests/test_ynl_cli.sh b/tools/net/ynl/tests/test_ynl_cli.sh
new file mode 100755
index 000000000000..7c0722a08117
--- /dev/null
+++ b/tools/net/ynl/tests/test_ynl_cli.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test YNL CLI functionality
+
+# Load KTAP test helpers
+KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh"
+# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh
+source "$KSELFTEST_KTAP_HELPERS"
+
+# Default ynl path for direct execution, can be overridden by make install
+ynl="../pyynl/cli.py"
+
+readonly NSIM_ID="1338"
+readonly NSIM_DEV_NAME="nsim${NSIM_ID}"
+readonly VETH_A="veth_a"
+readonly VETH_B="veth_b"
+
+testns="ynl-$(mktemp -u XXXXXX)"
+TESTS_NO=0
+
+# Test listing available families
+cli_list_families()
+{
+	if $ynl --list-families &>/dev/null; then
+		ktap_test_pass "YNL CLI list families"
+	else
+		ktap_test_fail "YNL CLI list families"
+	fi
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test netdev family operations (dev-get, queue-get)
+cli_netdev_ops()
+{
+	local dev_output
+	local ifindex
+
+	ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+	dev_output=$(ip netns exec "$testns" $ynl --family netdev \
+		--do dev-get --json "{\"ifindex\": $ifindex}" 2>/dev/null)
+
+	if ! echo "$dev_output" | grep -q "ifindex"; then
+		ktap_test_fail "YNL CLI netdev operations (netdev dev-get output missing ifindex)"
+		return
+	fi
+
+	if ! ip netns exec "$testns" $ynl --family netdev \
+		--dump queue-get --json "{\"ifindex\": $ifindex}" &>/dev/null; then
+		ktap_test_fail "YNL CLI netdev operations (failed to get netdev queue info)"
+		return
+	fi
+
+	ktap_test_pass "YNL CLI netdev operations"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test ethtool family operations (rings-get, linkinfo-get)
+cli_ethtool_ops()
+{
+	local rings_output
+	local linkinfo_output
+
+	rings_output=$(ip netns exec "$testns" $ynl --family ethtool \
+		--do rings-get --json "{\"header\": {\"dev-name\": \"$NSIM_DEV_NAME\"}}" 2>/dev/null)
+
+	if ! echo "$rings_output" | grep -q "header"; then
+		ktap_test_fail "YNL CLI ethtool operations (ethtool rings-get output missing header)"
+		return
+	fi
+
+	linkinfo_output=$(ip netns exec "$testns" $ynl --family ethtool \
+		--do linkinfo-get --json "{\"header\": {\"dev-name\": \"$VETH_A\"}}" 2>/dev/null)
+
+	if ! echo "$linkinfo_output" | grep -q "header"; then
+		ktap_test_fail "YNL CLI ethtool operations (ethtool linkinfo-get output missing header)"
+		return
+	fi
+
+	ktap_test_pass "YNL CLI ethtool operations"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-route family operations
+cli_rt_route_ops()
+{
+	local ifindex
+
+	if ! $ynl --list-families 2>/dev/null | grep -q "rt-route"; then
+		ktap_test_skip "YNL CLI rt-route operations (rt-route family not available)"
+		return
+	fi
+
+	ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+	# Add route: 192.0.2.0/24 dev $dev scope link
+	if ! ip netns exec "$testns" $ynl --family rt-route --do newroute --create \
+		--json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null; then
+		ktap_test_fail "YNL CLI rt-route operations (failed to add route)"
+		return
+	fi
+
+	local route_output
+	route_output=$(ip netns exec "$testns" $ynl --family rt-route --dump getroute 2>/dev/null)
+	if echo "$route_output" | grep -q "192.0.2.0"; then
+		ktap_test_pass "YNL CLI rt-route operations"
+	else
+		ktap_test_fail "YNL CLI rt-route operations (failed to verify route)"
+	fi
+
+	ip netns exec "$testns" $ynl --family rt-route --do delroute \
+		--json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-addr family operations
+cli_rt_addr_ops()
+{
+	local ifindex
+
+	if ! $ynl --list-families 2>/dev/null | grep -q "rt-addr"; then
+		ktap_test_skip "YNL CLI rt-addr operations (rt-addr family not available)"
+		return
+	fi
+
+	ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+	if ! ip netns exec "$testns" $ynl --family rt-addr --do newaddr \
+		--json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null; then
+		ktap_test_fail "YNL CLI rt-addr operations (failed to add address)"
+		return
+	fi
+
+	local addr_output
+	addr_output=$(ip netns exec "$testns" $ynl --family rt-addr --dump getaddr 2>/dev/null)
+	if echo "$addr_output" | grep -q "192.0.2.100"; then
+		ktap_test_pass "YNL CLI rt-addr operations"
+	else
+		ktap_test_fail "YNL CLI rt-addr operations (failed to verify address)"
+	fi
+
+	ip netns exec "$testns" $ynl --family rt-addr --do deladdr \
+		--json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-link family operations
+cli_rt_link_ops()
+{
+	if ! $ynl --list-families 2>/dev/null | grep -q "rt-link"; then
+		ktap_test_skip "YNL CLI rt-link operations (rt-link family not available)"
+		return
+	fi
+
+	if ! ip netns exec "$testns" $ynl --family rt-link --do newlink --create \
+		--json "{\"ifname\": \"dummy0\", \"linkinfo\": {\"kind\": \"dummy\"}}" &>/dev/null; then
+		ktap_test_fail "YNL CLI rt-link operations (failed to add link)"
+		return
+	fi
+
+	local link_output
+	link_output=$(ip netns exec "$testns" $ynl --family rt-link --dump getlink 2>/dev/null)
+	if echo "$link_output" | grep -q "$NSIM_DEV_NAME" && echo "$link_output" | grep -q "dummy0"; then
+		ktap_test_pass "YNL CLI rt-link operations"
+	else
+		ktap_test_fail "YNL CLI rt-link operations (failed to verify link)"
+	fi
+
+	ip netns exec "$testns" $ynl --family rt-link --do dellink \
+		--json "{\"ifname\": \"dummy0\"}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-neigh family operations
+cli_rt_neigh_ops()
+{
+	local ifindex
+
+	if ! $ynl --list-families 2>/dev/null | grep -q "rt-neigh"; then
+		ktap_test_skip "YNL CLI rt-neigh operations (rt-neigh family not available)"
+		return
+	fi
+
+	ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+	# Add neighbor: 192.0.2.1 dev nsim1338 lladdr 11:22:33:44:55:66 PERMANENT
+	if ! ip netns exec "$testns" $ynl --family rt-neigh --do newneigh --create \
+		--json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2, \"ndm-state\": 128}" &>/dev/null; then
+		ktap_test_fail "YNL CLI rt-neigh operations (failed to add neighbor)"
+	fi
+
+	local neigh_output
+	neigh_output=$(ip netns exec "$testns" $ynl --family rt-neigh --dump getneigh 2>/dev/null)
+	if echo "$neigh_output" | grep -q "192.0.2.1"; then
+		ktap_test_pass "YNL CLI rt-neigh operations"
+	else
+		ktap_test_fail "YNL CLI rt-neigh operations (failed to verify neighbor)"
+	fi
+
+	ip netns exec "$testns" $ynl --family rt-neigh --do delneigh \
+		--json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-rule family operations
+cli_rt_rule_ops()
+{
+	if ! $ynl --list-families 2>/dev/null | grep -q "rt-rule"; then
+		ktap_test_skip "YNL CLI rt-rule operations (rt-rule family not available)"
+		return
+	fi
+
+	# Add rule: from 192.0.2.0/24 lookup 100 none
+	if ! ip netns exec "$testns" $ynl --family rt-rule --do newrule \
+		--json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null; then
+		ktap_test_fail "YNL CLI rt-rule operations (failed to add rule)"
+		return
+	fi
+
+	local rule_output
+	rule_output=$(ip netns exec "$testns" $ynl --family rt-rule --dump getrule 2>/dev/null)
+	if echo "$rule_output" | grep -q "192.0.2.0"; then
+		ktap_test_pass "YNL CLI rt-rule operations"
+	else
+		ktap_test_fail "YNL CLI rt-rule operations (failed to verify rule)"
+	fi
+
+	ip netns exec "$testns" $ynl --family rt-rule --do delrule \
+		--json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test nlctrl family operations
+cli_nlctrl_ops()
+{
+	local family_output
+
+	if ! family_output=$($ynl --family nlctrl \
+		--do getfamily --json "{\"family-name\": \"netdev\"}" 2>/dev/null); then
+		ktap_test_fail "YNL CLI nlctrl getfamily (failed to get nlctrl family info)"
+		return
+	fi
+
+	if ! echo "$family_output" | grep -q "family-name"; then
+		ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-name)"
+		return
+	fi
+
+	if ! echo "$family_output" | grep -q "family-id"; then
+		ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-id)"
+		return
+	fi
+
+	ktap_test_pass "YNL CLI nlctrl getfamily"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+setup()
+{
+	modprobe netdevsim &> /dev/null
+	if ! [ -f /sys/bus/netdevsim/new_device ]; then
+		ktap_skip_all "netdevsim module not available"
+		exit "$KSFT_SKIP"
+	fi
+
+	if ! ip netns add "$testns" 2>/dev/null; then
+		ktap_skip_all "failed to create test namespace"
+		exit "$KSFT_SKIP"
+	fi
+
+	echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || {
+		ktap_skip_all "failed to create netdevsim device"
+		exit "$KSFT_SKIP"
+	}
+
+	local dev
+	dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1)
+	if [[ -z "$dev" ]]; then
+		ktap_skip_all "failed to find netdevsim device"
+		exit "$KSFT_SKIP"
+	fi
+
+	ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || {
+		ktap_skip_all "failed to rename netdevsim device"
+		exit "$KSFT_SKIP"
+	}
+
+	ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null
+
+	if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then
+		ktap_skip_all "failed to create veth pair"
+		exit "$KSFT_SKIP"
+	fi
+
+	ip -n "$testns" link set "$VETH_A" up 2>/dev/null
+	ip -n "$testns" link set "$VETH_B" up 2>/dev/null
+}
+
+cleanup()
+{
+	ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true
+	ip netns del "$testns" 2>/dev/null || true
+}
+
+# Check if ynl command is available
+if ! command -v $ynl &>/dev/null && [[ ! -x $ynl ]]; then
+	ktap_skip_all "ynl command not found: $ynl"
+	exit "$KSFT_SKIP"
+fi
+
+trap cleanup EXIT
+
+ktap_print_header
+setup
+ktap_set_plan "${TESTS_NO}"
+
+cli_list_families
+cli_netdev_ops
+cli_ethtool_ops
+cli_rt_route_ops
+cli_rt_addr_ops
+cli_rt_link_ops
+cli_rt_neigh_ops
+cli_rt_rule_ops
+cli_nlctrl_ops
+
+ktap_finished
diff --git a/tools/net/ynl/tests/test_ynl_ethtool.sh b/tools/net/ynl/tests/test_ynl_ethtool.sh
new file mode 100755
index 000000000000..b826269017f4
--- /dev/null
+++ b/tools/net/ynl/tests/test_ynl_ethtool.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test YNL ethtool functionality
+
+# Load KTAP test helpers
+KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh"
+# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh
+source "$KSELFTEST_KTAP_HELPERS"
+
+# Default ynl-ethtool path for direct execution, can be overridden by make install
+ynl_ethtool="../pyynl/ethtool.py"
+
+readonly NSIM_ID="1337"
+readonly NSIM_DEV_NAME="nsim${NSIM_ID}"
+readonly VETH_A="veth_a"
+readonly VETH_B="veth_b"
+
+testns="ynl-ethtool-$(mktemp -u XXXXXX)"
+TESTS_NO=0
+
+# Uses veth device as netdevsim doesn't support basic ethtool device info
+ethtool_device_info()
+{
+	local info_output
+
+	info_output=$(ip netns exec "$testns" $ynl_ethtool "$VETH_A" 2>/dev/null)
+
+	if ! echo "$info_output" | grep -q "Settings for"; then
+		ktap_test_fail "YNL ethtool device info (device info output missing expected content)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool device info"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_statistics()
+{
+	local stats_output
+
+	stats_output=$(ip netns exec "$testns" $ynl_ethtool --statistics "$NSIM_DEV_NAME" 2>/dev/null)
+
+	if ! echo "$stats_output" | grep -q -E "(NIC statistics|packets|bytes)"; then
+		ktap_test_fail "YNL ethtool statistics (statistics output missing expected content)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool statistics"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_ring_params()
+{
+	local ring_output
+
+	ring_output=$(ip netns exec "$testns" $ynl_ethtool --show-ring "$NSIM_DEV_NAME" 2>/dev/null)
+
+	if ! echo "$ring_output" | grep -q -E "(Ring parameters|RX|TX)"; then
+		ktap_test_fail "YNL ethtool ring parameters (ring parameters output missing expected content)"
+		return
+	fi
+
+	if ! ip netns exec "$testns" $ynl_ethtool --set-ring "$NSIM_DEV_NAME" rx 64 2>/dev/null; then
+		ktap_test_fail "YNL ethtool ring parameters (set-ring command failed unexpectedly)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool ring parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_coalesce_params()
+{
+	if ! ip netns exec "$testns" $ynl_ethtool --show-coalesce "$NSIM_DEV_NAME" &>/dev/null; then
+		ktap_test_fail "YNL ethtool coalesce parameters (failed to get coalesce parameters)"
+		return
+	fi
+
+	if ! ip netns exec "$testns" $ynl_ethtool --set-coalesce "$NSIM_DEV_NAME" rx-usecs 50 2>/dev/null; then
+		ktap_test_fail "YNL ethtool coalesce parameters (set-coalesce command failed unexpectedly)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool coalesce parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_pause_params()
+{
+	if ! ip netns exec "$testns" $ynl_ethtool --show-pause "$NSIM_DEV_NAME" &>/dev/null; then
+		ktap_test_fail "YNL ethtool pause parameters (failed to get pause parameters)"
+		return
+	fi
+
+	if ! ip netns exec "$testns" $ynl_ethtool --set-pause "$NSIM_DEV_NAME" tx 1 rx 1 2>/dev/null; then
+		ktap_test_fail "YNL ethtool pause parameters (set-pause command failed unexpectedly)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool pause parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_features_info()
+{
+	local features_output
+
+	features_output=$(ip netns exec "$testns" $ynl_ethtool --show-features "$NSIM_DEV_NAME" 2>/dev/null)
+
+	if ! echo "$features_output" | grep -q -E "(Features|offload)"; then
+		ktap_test_fail "YNL ethtool features info (features output missing expected content)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool features info (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_channels_info()
+{
+	local channels_output
+
+	channels_output=$(ip netns exec "$testns" $ynl_ethtool --show-channels "$NSIM_DEV_NAME" 2>/dev/null)
+
+	if ! echo "$channels_output" | grep -q -E "(Channel|Combined|RX|TX)"; then
+		ktap_test_fail "YNL ethtool channels info (channels output missing expected content)"
+		return
+	fi
+
+	if ! ip netns exec "$testns" $ynl_ethtool --set-channels "$NSIM_DEV_NAME" combined-count 1 2>/dev/null; then
+		ktap_test_fail "YNL ethtool channels info (set-channels command failed unexpectedly)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool channels info (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_time_stamping()
+{
+	local ts_output
+
+	ts_output=$(ip netns exec "$testns" $ynl_ethtool --show-time-stamping "$NSIM_DEV_NAME" 2>/dev/null)
+
+	if ! echo "$ts_output" | grep -q -E "(Time stamping|timestamping|SOF_TIMESTAMPING)"; then
+		ktap_test_fail "YNL ethtool time stamping (time stamping output missing expected content)"
+		return
+	fi
+
+	ktap_test_pass "YNL ethtool time stamping"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+setup()
+{
+	modprobe netdevsim &> /dev/null
+	if ! [ -f /sys/bus/netdevsim/new_device ]; then
+		ktap_skip_all "netdevsim module not available"
+		exit "$KSFT_SKIP"
+	fi
+
+	if ! ip netns add "$testns" 2>/dev/null; then
+		ktap_skip_all "failed to create test namespace"
+		exit "$KSFT_SKIP"
+	fi
+
+	echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || {
+		ktap_skip_all "failed to create netdevsim device"
+		exit "$KSFT_SKIP"
+	}
+
+	local dev
+	dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1)
+	if [[ -z "$dev" ]]; then
+		ktap_skip_all "failed to find netdevsim device"
+		exit "$KSFT_SKIP"
+	fi
+
+	ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || {
+		ktap_skip_all "failed to rename netdevsim device"
+		exit "$KSFT_SKIP"
+	}
+
+	ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null
+
+	if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then
+		ktap_skip_all "failed to create veth pair"
+		exit "$KSFT_SKIP"
+	fi
+
+	ip -n "$testns" link set "$VETH_A" up 2>/dev/null
+	ip -n "$testns" link set "$VETH_B" up 2>/dev/null
+}
+
+cleanup()
+{
+	ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true
+	ip netns del "$testns" 2>/dev/null || true
+}
+
+# Check if ynl-ethtool command is available
+if ! command -v $ynl_ethtool &>/dev/null && [[ ! -x $ynl_ethtool ]]; then
+	ktap_skip_all "ynl-ethtool command not found: $ynl_ethtool"
+	exit "$KSFT_SKIP"
+fi
+
+trap cleanup EXIT
+
+ktap_print_header
+setup
+ktap_set_plan "${TESTS_NO}"
+
+ethtool_device_info
+ethtool_statistics
+ethtool_ring_params
+ethtool_coalesce_params
+ethtool_pause_params
+ethtool_features_info
+ethtool_channels_info
+ethtool_time_stamping
+
+ktap_finished
diff --git a/tools/net/ynl/ynltool/.gitignore b/tools/net/ynl/ynltool/.gitignore
new file mode 100644
index 000000000000..690d399c921a
--- /dev/null
+++ b/tools/net/ynl/ynltool/.gitignore
@@ -0,0 +1,2 @@
+ynltool
+*.d
diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile
new file mode 100644
index 000000000000..f5b1de32daa5
--- /dev/null
+++ b/tools/net/ynl/ynltool/Makefile
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../Makefile.deps
+
+INSTALL	?= install
+prefix  ?= /usr
+
+CC := gcc
+CFLAGS := -Wall -Wextra -Werror -O2
+ifeq ("$(DEBUG)","1")
+  CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+CFLAGS += -I../lib -I../generated -I../../../include/uapi/
+
+SRC_VERSION := \
+	$(shell make --no-print-directory -sC ../../../.. kernelversion || \
+		echo "unknown")
+
+CFLAGS += -DSRC_VERSION='"$(SRC_VERSION)"'
+
+SRCS := $(wildcard *.c)
+OBJS := $(patsubst %.c,$(OUTPUT)%.o,$(SRCS))
+
+YNLTOOL := $(OUTPUT)ynltool
+
+include $(wildcard *.d)
+
+all: $(YNLTOOL)
+
+Q = @
+
+$(YNLTOOL): ../libynl.a $(OBJS)
+	$(Q)echo -e "\tLINK $@"
+	$(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) ../libynl.a -lm
+
+%.o: %.c ../libynl.a
+	$(Q)echo -e "\tCC $@"
+	$(Q)$(COMPILE.c) -MMD -c -o $@ $<
+
+../libynl.a:
+	$(Q)$(MAKE) -C ../
+
+clean:
+	rm -f *.o *.d *~
+
+distclean: clean
+	rm -f $(YNLTOOL)
+
+bindir ?= /usr/bin
+
+install: $(YNLTOOL)
+	$(INSTALL) -m 0755 $(YNLTOOL) $(DESTDIR)$(bindir)/$(YNLTOOL)
+
+.PHONY: all clean distclean
+.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/ynltool/json_writer.c b/tools/net/ynl/ynltool/json_writer.c
new file mode 100644
index 000000000000..c8685e592cd3
--- /dev/null
+++ b/tools/net/ynl/ynltool/json_writer.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * Authors:	Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <stdint.h>
+
+#include "json_writer.h"
+
+struct json_writer {
+	FILE		*out;
+	unsigned	depth;
+	bool		pretty;
+	char		sep;
+};
+
+static void jsonw_indent(json_writer_t *self)
+{
+	unsigned i;
+	for (i = 0; i < self->depth; ++i)
+		fputs("    ", self->out);
+}
+
+static void jsonw_eol(json_writer_t *self)
+{
+	if (!self->pretty)
+		return;
+
+	putc('\n', self->out);
+	jsonw_indent(self);
+}
+
+static void jsonw_eor(json_writer_t *self)
+{
+	if (self->sep != '\0')
+		putc(self->sep, self->out);
+	self->sep = ',';
+}
+
+static void jsonw_puts(json_writer_t *self, const char *str)
+{
+	putc('"', self->out);
+	for (; *str; ++str)
+		switch (*str) {
+		case '\t':
+			fputs("\\t", self->out);
+			break;
+		case '\n':
+			fputs("\\n", self->out);
+			break;
+		case '\r':
+			fputs("\\r", self->out);
+			break;
+		case '\f':
+			fputs("\\f", self->out);
+			break;
+		case '\b':
+			fputs("\\b", self->out);
+			break;
+		case '\\':
+			fputs("\\\\", self->out);
+			break;
+		case '"':
+			fputs("\\\"", self->out);
+			break;
+		default:
+			putc(*str, self->out);
+		}
+	putc('"', self->out);
+}
+
+json_writer_t *jsonw_new(FILE *f)
+{
+	json_writer_t *self = malloc(sizeof(*self));
+	if (self) {
+		self->out = f;
+		self->depth = 0;
+		self->pretty = false;
+		self->sep = '\0';
+	}
+	return self;
+}
+
+void jsonw_destroy(json_writer_t **self_p)
+{
+	json_writer_t *self = *self_p;
+
+	assert(self->depth == 0);
+	fputs("\n", self->out);
+	fflush(self->out);
+	free(self);
+	*self_p = NULL;
+}
+
+void jsonw_pretty(json_writer_t *self, bool on)
+{
+	self->pretty = on;
+}
+
+void jsonw_reset(json_writer_t *self)
+{
+	assert(self->depth == 0);
+	self->sep = '\0';
+}
+
+static void jsonw_begin(json_writer_t *self, int c)
+{
+	jsonw_eor(self);
+	putc(c, self->out);
+	++self->depth;
+	self->sep = '\0';
+}
+
+static void jsonw_end(json_writer_t *self, int c)
+{
+	assert(self->depth > 0);
+
+	--self->depth;
+	if (self->sep != '\0')
+		jsonw_eol(self);
+	putc(c, self->out);
+	self->sep = ',';
+}
+
+void jsonw_name(json_writer_t *self, const char *name)
+{
+	jsonw_eor(self);
+	jsonw_eol(self);
+	self->sep = '\0';
+	jsonw_puts(self, name);
+	putc(':', self->out);
+	if (self->pretty)
+		putc(' ', self->out);
+}
+
+void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
+{
+	jsonw_eor(self);
+	putc('"', self->out);
+	vfprintf(self->out, fmt, ap);
+	putc('"', self->out);
+}
+
+void jsonw_printf(json_writer_t *self, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	jsonw_eor(self);
+	vfprintf(self->out, fmt, ap);
+	va_end(ap);
+}
+
+void jsonw_start_object(json_writer_t *self)
+{
+	jsonw_begin(self, '{');
+}
+
+void jsonw_end_object(json_writer_t *self)
+{
+	jsonw_end(self, '}');
+}
+
+void jsonw_start_array(json_writer_t *self)
+{
+	jsonw_begin(self, '[');
+}
+
+void jsonw_end_array(json_writer_t *self)
+{
+	jsonw_end(self, ']');
+}
+
+void jsonw_string(json_writer_t *self, const char *value)
+{
+	jsonw_eor(self);
+	jsonw_puts(self, value);
+}
+
+void jsonw_bool(json_writer_t *self, bool val)
+{
+	jsonw_printf(self, "%s", val ? "true" : "false");
+}
+
+void jsonw_null(json_writer_t *self)
+{
+	jsonw_printf(self, "null");
+}
+
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num)
+{
+	jsonw_printf(self, fmt, num);
+}
+
+void jsonw_float(json_writer_t *self, double num)
+{
+	jsonw_printf(self, "%g", num);
+}
+
+void jsonw_hu(json_writer_t *self, unsigned short num)
+{
+	jsonw_printf(self, "%hu", num);
+}
+
+void jsonw_uint(json_writer_t *self, uint64_t num)
+{
+	jsonw_printf(self, "%"PRIu64, num);
+}
+
+void jsonw_lluint(json_writer_t *self, unsigned long long int num)
+{
+	jsonw_printf(self, "%llu", num);
+}
+
+void jsonw_int(json_writer_t *self, int64_t num)
+{
+	jsonw_printf(self, "%"PRId64, num);
+}
+
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val)
+{
+	jsonw_name(self, prop);
+	jsonw_string(self, val);
+}
+
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool val)
+{
+	jsonw_name(self, prop);
+	jsonw_bool(self, val);
+}
+
+void jsonw_float_field(json_writer_t *self, const char *prop, double val)
+{
+	jsonw_name(self, prop);
+	jsonw_float(self, val);
+}
+
+void jsonw_float_field_fmt(json_writer_t *self,
+			   const char *prop,
+			   const char *fmt,
+			   double val)
+{
+	jsonw_name(self, prop);
+	jsonw_float_fmt(self, fmt, val);
+}
+
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num)
+{
+	jsonw_name(self, prop);
+	jsonw_uint(self, num);
+}
+
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num)
+{
+	jsonw_name(self, prop);
+	jsonw_hu(self, num);
+}
+
+void jsonw_lluint_field(json_writer_t *self,
+			const char *prop,
+			unsigned long long int num)
+{
+	jsonw_name(self, prop);
+	jsonw_lluint(self, num);
+}
+
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num)
+{
+	jsonw_name(self, prop);
+	jsonw_int(self, num);
+}
+
+void jsonw_null_field(json_writer_t *self, const char *prop)
+{
+	jsonw_name(self, prop);
+	jsonw_null(self);
+}
diff --git a/tools/net/ynl/ynltool/json_writer.h b/tools/net/ynl/ynltool/json_writer.h
new file mode 100644
index 000000000000..0f1e63c88f6a
--- /dev/null
+++ b/tools/net/ynl/ynltool/json_writer.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * Authors:	Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _JSON_WRITER_H_
+#define _JSON_WRITER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+/* Opaque class structure */
+typedef struct json_writer json_writer_t;
+
+/* Create a new JSON stream */
+json_writer_t *jsonw_new(FILE *f);
+/* End output to JSON stream */
+void jsonw_destroy(json_writer_t **self_p);
+
+/* Cause output to have pretty whitespace */
+void jsonw_pretty(json_writer_t *self, bool on);
+
+/* Reset separator to create new JSON */
+void jsonw_reset(json_writer_t *self);
+
+/* Add property name */
+void jsonw_name(json_writer_t *self, const char *name);
+
+/* Add value  */
+void __attribute__((format(printf, 2, 0))) jsonw_vprintf_enquote(json_writer_t *self,
+								 const char *fmt,
+								 va_list ap);
+void __attribute__((format(printf, 2, 3))) jsonw_printf(json_writer_t *self,
+							const char *fmt, ...);
+void jsonw_string(json_writer_t *self, const char *value);
+void jsonw_bool(json_writer_t *self, bool value);
+void jsonw_float(json_writer_t *self, double number);
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num);
+void jsonw_uint(json_writer_t *self, uint64_t number);
+void jsonw_hu(json_writer_t *self, unsigned short number);
+void jsonw_int(json_writer_t *self, int64_t number);
+void jsonw_null(json_writer_t *self);
+void jsonw_lluint(json_writer_t *self, unsigned long long int num);
+
+/* Useful Combinations of name and value */
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val);
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool value);
+void jsonw_float_field(json_writer_t *self, const char *prop, double num);
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num);
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num);
+void jsonw_null_field(json_writer_t *self, const char *prop);
+void jsonw_lluint_field(json_writer_t *self, const char *prop,
+			unsigned long long int num);
+void jsonw_float_field_fmt(json_writer_t *self, const char *prop,
+			   const char *fmt, double val);
+
+/* Collections */
+void jsonw_start_object(json_writer_t *self);
+void jsonw_end_object(json_writer_t *self);
+
+void jsonw_start_array(json_writer_t *self);
+void jsonw_end_array(json_writer_t *self);
+
+/* Override default exception handling */
+typedef void (jsonw_err_handler_fn)(const char *);
+
+#endif /* _JSON_WRITER_H_ */
diff --git a/tools/net/ynl/ynltool/main.c b/tools/net/ynl/ynltool/main.c
new file mode 100644
index 000000000000..5d0f428eed0a
--- /dev/null
+++ b/tools/net/ynl/ynltool/main.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+/* Copyright Meta Platforms, Inc. and affiliates */
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "main.h"
+
+const char *bin_name;
+static int last_argc;
+static char **last_argv;
+static int (*last_do_help)(int argc, char **argv);
+json_writer_t *json_wtr;
+bool pretty_output;
+bool json_output;
+
+static void __attribute__((noreturn)) clean_and_exit(int i)
+{
+	if (json_output)
+		jsonw_destroy(&json_wtr);
+
+	exit(i);
+}
+
+void usage(void)
+{
+	last_do_help(last_argc - 1, last_argv + 1);
+
+	clean_and_exit(-1);
+}
+
+static int do_help(int argc __attribute__((unused)),
+		   char **argv __attribute__((unused)))
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n"
+		"       %s version\n"
+		"\n"
+		"       OBJECT := { page-pool | qstats }\n"
+		"       " HELP_SPEC_OPTIONS "\n"
+		"",
+		bin_name, bin_name);
+
+	return 0;
+}
+
+static int do_version(int argc __attribute__((unused)),
+		      char **argv __attribute__((unused)))
+{
+	if (json_output) {
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "version");
+		jsonw_printf(json_wtr, SRC_VERSION);
+		jsonw_end_object(json_wtr);
+	} else {
+		printf("%s " SRC_VERSION "\n", bin_name);
+	}
+	return 0;
+}
+
+static const struct cmd commands[] = {
+	{ "help",	do_help },
+	{ "page-pool",	do_page_pool },
+	{ "qstats",	do_qstats },
+	{ "version",	do_version },
+	{ 0 }
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+	       int (*help)(int argc, char **argv))
+{
+	unsigned int i;
+
+	last_argc = argc;
+	last_argv = argv;
+	last_do_help = help;
+
+	if (argc < 1 && cmds[0].func)
+		return cmds[0].func(argc, argv);
+
+	for (i = 0; cmds[i].cmd; i++) {
+		if (is_prefix(*argv, cmds[i].cmd)) {
+			if (!cmds[i].func) {
+				p_err("command '%s' is not available", cmds[i].cmd);
+				return -1;
+			}
+			return cmds[i].func(argc - 1, argv + 1);
+		}
+	}
+
+	help(argc - 1, argv + 1);
+
+	return -1;
+}
+
+bool is_prefix(const char *pfx, const char *str)
+{
+	if (!pfx)
+		return false;
+	if (strlen(str) < strlen(pfx))
+		return false;
+
+	return !memcmp(str, pfx, strlen(pfx));
+}
+
+/* Last argument MUST be NULL pointer */
+int detect_common_prefix(const char *arg, ...)
+{
+	unsigned int count = 0;
+	const char *ref;
+	char msg[256];
+	va_list ap;
+
+	snprintf(msg, sizeof(msg), "ambiguous prefix: '%s' could be '", arg);
+	va_start(ap, arg);
+	while ((ref = va_arg(ap, const char *))) {
+		if (!is_prefix(arg, ref))
+			continue;
+		count++;
+		if (count > 1)
+			strncat(msg, "' or '", sizeof(msg) - strlen(msg) - 1);
+		strncat(msg, ref, sizeof(msg) - strlen(msg) - 1);
+	}
+	va_end(ap);
+	strncat(msg, "'", sizeof(msg) - strlen(msg) - 1);
+
+	if (count >= 2) {
+		p_err("%s", msg);
+		return -1;
+	}
+
+	return 0;
+}
+
+void p_err(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	if (json_output) {
+		jsonw_start_object(json_wtr);
+		jsonw_name(json_wtr, "error");
+		jsonw_vprintf_enquote(json_wtr, fmt, ap);
+		jsonw_end_object(json_wtr);
+	} else {
+		fprintf(stderr, "Error: ");
+		vfprintf(stderr, fmt, ap);
+		fprintf(stderr, "\n");
+	}
+	va_end(ap);
+}
+
+void p_info(const char *fmt, ...)
+{
+	va_list ap;
+
+	if (json_output)
+		return;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, "\n");
+	va_end(ap);
+}
+
+int main(int argc, char **argv)
+{
+	static const struct option options[] = {
+		{ "json",	no_argument,	NULL,	'j' },
+		{ "help",	no_argument,	NULL,	'h' },
+		{ "pretty",	no_argument,	NULL,	'p' },
+		{ "version",	no_argument,	NULL,	'V' },
+		{ 0 }
+	};
+	bool version_requested = false;
+	int opt, ret;
+
+	setlinebuf(stdout);
+
+	last_do_help = do_help;
+	pretty_output = false;
+	json_output = false;
+	bin_name = "ynltool";
+
+	opterr = 0;
+	while ((opt = getopt_long(argc, argv, "Vhjp",
+				  options, NULL)) >= 0) {
+		switch (opt) {
+		case 'V':
+			version_requested = true;
+			break;
+		case 'h':
+			return do_help(argc, argv);
+		case 'p':
+			pretty_output = true;
+			/* fall through */
+		case 'j':
+			if (!json_output) {
+				json_wtr = jsonw_new(stdout);
+				if (!json_wtr) {
+					p_err("failed to create JSON writer");
+					return -1;
+				}
+				json_output = true;
+			}
+			jsonw_pretty(json_wtr, pretty_output);
+			break;
+		default:
+			p_err("unrecognized option '%s'", argv[optind - 1]);
+			if (json_output)
+				clean_and_exit(-1);
+			else
+				usage();
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+	if (argc < 0)
+		usage();
+
+	if (version_requested)
+		ret = do_version(argc, argv);
+	else
+		ret = cmd_select(commands, argc, argv, do_help);
+
+	if (json_output)
+		jsonw_destroy(&json_wtr);
+
+	return ret;
+}
diff --git a/tools/net/ynl/ynltool/main.h b/tools/net/ynl/ynltool/main.h
new file mode 100644
index 000000000000..c7039f9ac55a
--- /dev/null
+++ b/tools/net/ynl/ynltool/main.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+/* Copyright Meta Platforms, Inc. and affiliates */
+
+#ifndef __YNLTOOL_H
+#define __YNLTOOL_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "json_writer.h"
+
+#define NEXT_ARG()	({ argc--; argv++; if (argc < 0) usage(); })
+#define NEXT_ARGP()	({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
+#define BAD_ARG()	({ p_err("what is '%s'?", *argv); -1; })
+#define GET_ARG()	({ argc--; *argv++; })
+#define REQ_ARGS(cnt)							\
+	({								\
+		int _cnt = (cnt);					\
+		bool _res;						\
+									\
+		if (argc < _cnt) {					\
+			p_err("'%s' needs at least %d arguments, %d found", \
+			      argv[-1], _cnt, argc);			\
+			_res = false;					\
+		} else {						\
+			_res = true;					\
+		}							\
+		_res;							\
+	})
+
+#define HELP_SPEC_OPTIONS						\
+	"OPTIONS := { {-j|--json} [{-p|--pretty}] }"
+
+extern const char *bin_name;
+
+extern json_writer_t *json_wtr;
+extern bool json_output;
+extern bool pretty_output;
+
+void __attribute__((format(printf, 1, 2))) p_err(const char *fmt, ...);
+void __attribute__((format(printf, 1, 2))) p_info(const char *fmt, ...);
+
+bool is_prefix(const char *pfx, const char *str);
+int detect_common_prefix(const char *arg, ...);
+void usage(void) __attribute__((noreturn));
+
+struct cmd {
+	const char *cmd;
+	int (*func)(int argc, char **argv);
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+	       int (*help)(int argc, char **argv));
+
+/* subcommands */
+int do_page_pool(int argc, char **argv);
+int do_qstats(int argc, char **argv);
+
+#endif /* __YNLTOOL_H */
diff --git a/tools/net/ynl/ynltool/page-pool.c b/tools/net/ynl/ynltool/page-pool.c
new file mode 100644
index 000000000000..4b24492abab7
--- /dev/null
+++ b/tools/net/ynl/ynltool/page-pool.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <net/if.h>
+
+#include <ynl.h>
+#include "netdev-user.h"
+
+#include "main.h"
+
+struct pp_stat {
+	unsigned int ifc;
+
+	struct {
+		unsigned int cnt;
+		size_t refs, bytes;
+	} live[2];
+
+	size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache;
+};
+
+struct pp_stats_array {
+	unsigned int i, max;
+	struct pp_stat *s;
+};
+
+static struct pp_stat *find_ifc(struct pp_stats_array *a, unsigned int ifindex)
+{
+	unsigned int i;
+
+	for (i = 0; i < a->i; i++) {
+		if (a->s[i].ifc == ifindex)
+			return &a->s[i];
+	}
+
+	a->i++;
+	if (a->i == a->max) {
+		a->max *= 2;
+		a->s = reallocarray(a->s, a->max, sizeof(*a->s));
+	}
+	a->s[i].ifc = ifindex;
+	return &a->s[i];
+}
+
+static void count_pool(struct pp_stat *s, unsigned int l,
+		       struct netdev_page_pool_get_rsp *pp)
+{
+	s->live[l].cnt++;
+	if (pp->_present.inflight)
+		s->live[l].refs += pp->inflight;
+	if (pp->_present.inflight_mem)
+		s->live[l].bytes += pp->inflight_mem;
+}
+
+/* We don't know how many pages are sitting in cache and ring
+ * so we will under-count the recycling rate a bit.
+ */
+static void print_json_recycling_stats(struct pp_stat *s)
+{
+	double recycle;
+
+	if (s->alloc_fast + s->alloc_slow) {
+		recycle = (double)(s->recycle_ring + s->recycle_cache) /
+			(s->alloc_fast + s->alloc_slow) * 100;
+		jsonw_float_field(json_wtr, "recycling_pct", recycle);
+	}
+
+	jsonw_name(json_wtr, "alloc");
+	jsonw_start_object(json_wtr);
+	jsonw_uint_field(json_wtr, "slow", s->alloc_slow);
+	jsonw_uint_field(json_wtr, "fast", s->alloc_fast);
+	jsonw_end_object(json_wtr);
+
+	jsonw_name(json_wtr, "recycle");
+	jsonw_start_object(json_wtr);
+	jsonw_uint_field(json_wtr, "ring", s->recycle_ring);
+	jsonw_uint_field(json_wtr, "cache", s->recycle_cache);
+	jsonw_end_object(json_wtr);
+}
+
+static void print_plain_recycling_stats(struct pp_stat *s)
+{
+	double recycle;
+
+	if (s->alloc_fast + s->alloc_slow) {
+		recycle = (double)(s->recycle_ring + s->recycle_cache) /
+			(s->alloc_fast + s->alloc_slow) * 100;
+		printf("recycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)",
+		       recycle, s->alloc_slow, s->alloc_fast,
+		       s->recycle_ring, s->recycle_cache);
+	}
+}
+
+static void print_json_stats(struct pp_stats_array *a)
+{
+	jsonw_start_array(json_wtr);
+
+	for (unsigned int i = 0; i < a->i; i++) {
+		char ifname[IF_NAMESIZE];
+		struct pp_stat *s = &a->s[i];
+		const char *name;
+
+		jsonw_start_object(json_wtr);
+
+		if (!s->ifc) {
+			jsonw_string_field(json_wtr, "ifname", "<orphan>");
+			jsonw_uint_field(json_wtr, "ifindex", 0);
+		} else {
+			name = if_indextoname(s->ifc, ifname);
+			if (name)
+				jsonw_string_field(json_wtr, "ifname", name);
+			jsonw_uint_field(json_wtr, "ifindex", s->ifc);
+		}
+
+		jsonw_uint_field(json_wtr, "page_pools", s->live[1].cnt);
+		jsonw_uint_field(json_wtr, "zombies", s->live[0].cnt);
+
+		jsonw_name(json_wtr, "live");
+		jsonw_start_object(json_wtr);
+		jsonw_uint_field(json_wtr, "refs", s->live[1].refs);
+		jsonw_uint_field(json_wtr, "bytes", s->live[1].bytes);
+		jsonw_end_object(json_wtr);
+
+		jsonw_name(json_wtr, "zombie");
+		jsonw_start_object(json_wtr);
+		jsonw_uint_field(json_wtr, "refs", s->live[0].refs);
+		jsonw_uint_field(json_wtr, "bytes", s->live[0].bytes);
+		jsonw_end_object(json_wtr);
+
+		if (s->alloc_fast || s->alloc_slow)
+			print_json_recycling_stats(s);
+
+		jsonw_end_object(json_wtr);
+	}
+
+	jsonw_end_array(json_wtr);
+}
+
+static void print_plain_stats(struct pp_stats_array *a)
+{
+	for (unsigned int i = 0; i < a->i; i++) {
+		char ifname[IF_NAMESIZE];
+		struct pp_stat *s = &a->s[i];
+		const char *name;
+
+		if (!s->ifc) {
+			printf("<orphan>\t");
+		} else {
+			name = if_indextoname(s->ifc, ifname);
+			if (name)
+				printf("%8s", name);
+			printf("[%u]\t", s->ifc);
+		}
+
+		printf("page pools: %u (zombies: %u)\n",
+		       s->live[1].cnt, s->live[0].cnt);
+		printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n",
+		       s->live[1].refs, s->live[1].bytes,
+		       s->live[0].refs, s->live[0].bytes);
+
+		if (s->alloc_fast || s->alloc_slow) {
+			printf("\t\t");
+			print_plain_recycling_stats(s);
+			printf("\n");
+		}
+	}
+}
+
+static bool
+find_pool_stat_in_list(struct netdev_page_pool_stats_get_list *pp_stats,
+		       __u64 pool_id, struct pp_stat *pstat)
+{
+	ynl_dump_foreach(pp_stats, pp) {
+		if (!pp->_present.info || !pp->info._present.id)
+			continue;
+		if (pp->info.id != pool_id)
+			continue;
+
+		memset(pstat, 0, sizeof(*pstat));
+		if (pp->_present.alloc_fast)
+			pstat->alloc_fast = pp->alloc_fast;
+		if (pp->_present.alloc_refill)
+			pstat->alloc_fast += pp->alloc_refill;
+		if (pp->_present.alloc_slow)
+			pstat->alloc_slow = pp->alloc_slow;
+		if (pp->_present.recycle_ring)
+			pstat->recycle_ring = pp->recycle_ring;
+		if (pp->_present.recycle_cached)
+			pstat->recycle_cache = pp->recycle_cached;
+		return true;
+	}
+	return false;
+}
+
+static void
+print_json_pool_list(struct netdev_page_pool_get_list *pools,
+		     struct netdev_page_pool_stats_get_list *pp_stats,
+		     bool zombies_only)
+{
+	jsonw_start_array(json_wtr);
+
+	ynl_dump_foreach(pools, pp) {
+		char ifname[IF_NAMESIZE];
+		struct pp_stat pstat;
+		const char *name;
+
+		if (zombies_only && !pp->_present.detach_time)
+			continue;
+
+		jsonw_start_object(json_wtr);
+
+		jsonw_uint_field(json_wtr, "id", pp->id);
+
+		if (pp->_present.ifindex) {
+			name = if_indextoname(pp->ifindex, ifname);
+			if (name)
+				jsonw_string_field(json_wtr, "ifname", name);
+			jsonw_uint_field(json_wtr, "ifindex", pp->ifindex);
+		}
+
+		if (pp->_present.napi_id)
+			jsonw_uint_field(json_wtr, "napi_id", pp->napi_id);
+
+		if (pp->_present.inflight)
+			jsonw_uint_field(json_wtr, "refs", pp->inflight);
+
+		if (pp->_present.inflight_mem)
+			jsonw_uint_field(json_wtr, "bytes", pp->inflight_mem);
+
+		if (pp->_present.detach_time)
+			jsonw_uint_field(json_wtr, "detach_time", pp->detach_time);
+
+		if (pp->_present.dmabuf)
+			jsonw_uint_field(json_wtr, "dmabuf", pp->dmabuf);
+
+		if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) &&
+		    (pstat.alloc_fast || pstat.alloc_slow))
+			print_json_recycling_stats(&pstat);
+
+		jsonw_end_object(json_wtr);
+	}
+
+	jsonw_end_array(json_wtr);
+}
+
+static void
+print_plain_pool_list(struct netdev_page_pool_get_list *pools,
+		      struct netdev_page_pool_stats_get_list *pp_stats,
+		      bool zombies_only)
+{
+	ynl_dump_foreach(pools, pp) {
+		char ifname[IF_NAMESIZE];
+		struct pp_stat pstat;
+		const char *name;
+
+		if (zombies_only && !pp->_present.detach_time)
+			continue;
+
+		printf("pool id: %llu", pp->id);
+
+		if (pp->_present.ifindex) {
+			name = if_indextoname(pp->ifindex, ifname);
+			if (name)
+				printf("  dev: %s", name);
+			printf("[%u]", pp->ifindex);
+		}
+
+		if (pp->_present.napi_id)
+			printf("  napi: %llu", pp->napi_id);
+
+		printf("\n");
+
+		if (pp->_present.inflight || pp->_present.inflight_mem) {
+			printf("  inflight:");
+			if (pp->_present.inflight)
+				printf(" %llu pages", pp->inflight);
+			if (pp->_present.inflight_mem)
+				printf(" %llu bytes", pp->inflight_mem);
+			printf("\n");
+		}
+
+		if (pp->_present.detach_time)
+			printf("  detached: %llu\n", pp->detach_time);
+
+		if (pp->_present.dmabuf)
+			printf("  dmabuf: %u\n", pp->dmabuf);
+
+		if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) &&
+		    (pstat.alloc_fast || pstat.alloc_slow)) {
+			printf("  ");
+			print_plain_recycling_stats(&pstat);
+			printf("\n");
+		}
+	}
+}
+
+static void aggregate_device_stats(struct pp_stats_array *a,
+				   struct netdev_page_pool_get_list *pools,
+				   struct netdev_page_pool_stats_get_list *pp_stats)
+{
+	ynl_dump_foreach(pools, pp) {
+		struct pp_stat *s = find_ifc(a, pp->ifindex);
+
+		count_pool(s, 1, pp);
+		if (pp->_present.detach_time)
+			count_pool(s, 0, pp);
+	}
+
+	ynl_dump_foreach(pp_stats, pp) {
+		struct pp_stat *s = find_ifc(a, pp->info.ifindex);
+
+		if (pp->_present.alloc_fast)
+			s->alloc_fast += pp->alloc_fast;
+		if (pp->_present.alloc_refill)
+			s->alloc_fast += pp->alloc_refill;
+		if (pp->_present.alloc_slow)
+			s->alloc_slow += pp->alloc_slow;
+		if (pp->_present.recycle_ring)
+			s->recycle_ring += pp->recycle_ring;
+		if (pp->_present.recycle_cached)
+			s->recycle_cache += pp->recycle_cached;
+	}
+}
+
+static int do_stats(int argc, char **argv)
+{
+	struct netdev_page_pool_stats_get_list *pp_stats;
+	struct netdev_page_pool_get_list *pools;
+	enum {
+		GROUP_BY_DEVICE,
+		GROUP_BY_POOL,
+	} group_by = GROUP_BY_DEVICE;
+	bool zombies_only = false;
+	struct pp_stats_array a = {};
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int ret = 0;
+
+	/* Parse options */
+	while (argc > 0) {
+		if (is_prefix(*argv, "group-by")) {
+			NEXT_ARG();
+
+			if (!REQ_ARGS(1))
+				return -1;
+
+			if (is_prefix(*argv, "device")) {
+				group_by = GROUP_BY_DEVICE;
+			} else if (is_prefix(*argv, "pp") ||
+				   is_prefix(*argv, "page-pool") ||
+				   is_prefix(*argv, "none")) {
+				group_by = GROUP_BY_POOL;
+			} else {
+				p_err("invalid group-by value '%s'", *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "zombies")) {
+			zombies_only = true;
+			group_by = GROUP_BY_POOL;
+			NEXT_ARG();
+		} else {
+			p_err("unknown option '%s'", *argv);
+			return -1;
+		}
+	}
+
+	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!ys) {
+		p_err("YNL: %s", yerr.msg);
+		return -1;
+	}
+
+	pools = netdev_page_pool_get_dump(ys);
+	if (!pools) {
+		p_err("failed to get page pools: %s", ys->err.msg);
+		ret = -1;
+		goto exit_close;
+	}
+
+	pp_stats = netdev_page_pool_stats_get_dump(ys);
+	if (!pp_stats) {
+		p_err("failed to get page pool stats: %s", ys->err.msg);
+		ret = -1;
+		goto exit_free_pp_list;
+	}
+
+	/* If grouping by pool, print individual pools */
+	if (group_by == GROUP_BY_POOL) {
+		if (json_output)
+			print_json_pool_list(pools, pp_stats, zombies_only);
+		else
+			print_plain_pool_list(pools, pp_stats, zombies_only);
+	} else {
+		/* Aggregated stats mode (group-by device) */
+		a.max = 64;
+		a.s = calloc(a.max, sizeof(*a.s));
+		if (!a.s) {
+			p_err("failed to allocate stats array");
+			ret = -1;
+			goto exit_free_stats_list;
+		}
+
+		aggregate_device_stats(&a, pools, pp_stats);
+
+		if (json_output)
+			print_json_stats(&a);
+		else
+			print_plain_stats(&a);
+
+		free(a.s);
+	}
+
+exit_free_stats_list:
+	netdev_page_pool_stats_get_list_free(pp_stats);
+exit_free_pp_list:
+	netdev_page_pool_get_list_free(pools);
+exit_close:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static int do_help(int argc __attribute__((unused)),
+		   char **argv __attribute__((unused)))
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s page-pool { COMMAND | help }\n"
+		"       %s page-pool stats [ OPTIONS ]\n"
+		"\n"
+		"       OPTIONS := { group-by { device | page-pool | none } | zombies }\n"
+		"\n"
+		"       stats                   - Display page pool statistics\n"
+		"       stats group-by device   - Group statistics by network device (default)\n"
+		"       stats group-by page-pool | pp | none\n"
+		"                               - Show individual page pool details (no grouping)\n"
+		"       stats zombies           - Show only zombie page pools (detached but with\n"
+		"                                 pages in flight). Implies group-by page-pool.\n"
+		"",
+		bin_name, bin_name);
+
+	return 0;
+}
+
+static const struct cmd page_pool_cmds[] = {
+	{ "help",	do_help },
+	{ "stats",	do_stats },
+	{ 0 }
+};
+
+int do_page_pool(int argc, char **argv)
+{
+	return cmd_select(page_pool_cmds, argc, argv, do_help);
+}
diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c
new file mode 100644
index 000000000000..31fb45709ffa
--- /dev/null
+++ b/tools/net/ynl/ynltool/qstats.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <net/if.h>
+#include <math.h>
+
+#include <ynl.h>
+#include "netdev-user.h"
+
+#include "main.h"
+
+static enum netdev_qstats_scope scope; /* default - device */
+
+struct queue_balance {
+	unsigned int ifindex;
+	enum netdev_queue_type type;
+	unsigned int queue_count;
+	__u64 *rx_packets;
+	__u64 *rx_bytes;
+	__u64 *tx_packets;
+	__u64 *tx_bytes;
+};
+
+static void print_json_qstats(struct netdev_qstats_get_list *qstats)
+{
+	jsonw_start_array(json_wtr);
+
+	ynl_dump_foreach(qstats, qs) {
+		char ifname[IF_NAMESIZE];
+		const char *name;
+
+		jsonw_start_object(json_wtr);
+
+		name = if_indextoname(qs->ifindex, ifname);
+		if (name)
+			jsonw_string_field(json_wtr, "ifname", name);
+		jsonw_uint_field(json_wtr, "ifindex", qs->ifindex);
+
+		if (qs->_present.queue_type)
+			jsonw_string_field(json_wtr, "queue-type",
+					   netdev_queue_type_str(qs->queue_type));
+		if (qs->_present.queue_id)
+			jsonw_uint_field(json_wtr, "queue-id", qs->queue_id);
+
+		if (qs->_present.rx_packets || qs->_present.rx_bytes ||
+		    qs->_present.rx_alloc_fail || qs->_present.rx_hw_drops ||
+		    qs->_present.rx_csum_complete || qs->_present.rx_hw_gro_packets) {
+			jsonw_name(json_wtr, "rx");
+			jsonw_start_object(json_wtr);
+			if (qs->_present.rx_packets)
+				jsonw_uint_field(json_wtr, "packets", qs->rx_packets);
+			if (qs->_present.rx_bytes)
+				jsonw_uint_field(json_wtr, "bytes", qs->rx_bytes);
+			if (qs->_present.rx_alloc_fail)
+				jsonw_uint_field(json_wtr, "alloc-fail", qs->rx_alloc_fail);
+			if (qs->_present.rx_hw_drops)
+				jsonw_uint_field(json_wtr, "hw-drops", qs->rx_hw_drops);
+			if (qs->_present.rx_hw_drop_overruns)
+				jsonw_uint_field(json_wtr, "hw-drop-overruns", qs->rx_hw_drop_overruns);
+			if (qs->_present.rx_hw_drop_ratelimits)
+				jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->rx_hw_drop_ratelimits);
+			if (qs->_present.rx_csum_complete)
+				jsonw_uint_field(json_wtr, "csum-complete", qs->rx_csum_complete);
+			if (qs->_present.rx_csum_unnecessary)
+				jsonw_uint_field(json_wtr, "csum-unnecessary", qs->rx_csum_unnecessary);
+			if (qs->_present.rx_csum_none)
+				jsonw_uint_field(json_wtr, "csum-none", qs->rx_csum_none);
+			if (qs->_present.rx_csum_bad)
+				jsonw_uint_field(json_wtr, "csum-bad", qs->rx_csum_bad);
+			if (qs->_present.rx_hw_gro_packets)
+				jsonw_uint_field(json_wtr, "hw-gro-packets", qs->rx_hw_gro_packets);
+			if (qs->_present.rx_hw_gro_bytes)
+				jsonw_uint_field(json_wtr, "hw-gro-bytes", qs->rx_hw_gro_bytes);
+			if (qs->_present.rx_hw_gro_wire_packets)
+				jsonw_uint_field(json_wtr, "hw-gro-wire-packets", qs->rx_hw_gro_wire_packets);
+			if (qs->_present.rx_hw_gro_wire_bytes)
+				jsonw_uint_field(json_wtr, "hw-gro-wire-bytes", qs->rx_hw_gro_wire_bytes);
+			jsonw_end_object(json_wtr);
+		}
+
+		if (qs->_present.tx_packets || qs->_present.tx_bytes ||
+		    qs->_present.tx_hw_drops || qs->_present.tx_csum_none ||
+		    qs->_present.tx_hw_gso_packets) {
+			jsonw_name(json_wtr, "tx");
+			jsonw_start_object(json_wtr);
+			if (qs->_present.tx_packets)
+				jsonw_uint_field(json_wtr, "packets", qs->tx_packets);
+			if (qs->_present.tx_bytes)
+				jsonw_uint_field(json_wtr, "bytes", qs->tx_bytes);
+			if (qs->_present.tx_hw_drops)
+				jsonw_uint_field(json_wtr, "hw-drops", qs->tx_hw_drops);
+			if (qs->_present.tx_hw_drop_errors)
+				jsonw_uint_field(json_wtr, "hw-drop-errors", qs->tx_hw_drop_errors);
+			if (qs->_present.tx_hw_drop_ratelimits)
+				jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->tx_hw_drop_ratelimits);
+			if (qs->_present.tx_csum_none)
+				jsonw_uint_field(json_wtr, "csum-none", qs->tx_csum_none);
+			if (qs->_present.tx_needs_csum)
+				jsonw_uint_field(json_wtr, "needs-csum", qs->tx_needs_csum);
+			if (qs->_present.tx_hw_gso_packets)
+				jsonw_uint_field(json_wtr, "hw-gso-packets", qs->tx_hw_gso_packets);
+			if (qs->_present.tx_hw_gso_bytes)
+				jsonw_uint_field(json_wtr, "hw-gso-bytes", qs->tx_hw_gso_bytes);
+			if (qs->_present.tx_hw_gso_wire_packets)
+				jsonw_uint_field(json_wtr, "hw-gso-wire-packets", qs->tx_hw_gso_wire_packets);
+			if (qs->_present.tx_hw_gso_wire_bytes)
+				jsonw_uint_field(json_wtr, "hw-gso-wire-bytes", qs->tx_hw_gso_wire_bytes);
+			if (qs->_present.tx_stop)
+				jsonw_uint_field(json_wtr, "stop", qs->tx_stop);
+			if (qs->_present.tx_wake)
+				jsonw_uint_field(json_wtr, "wake", qs->tx_wake);
+			jsonw_end_object(json_wtr);
+		}
+
+		jsonw_end_object(json_wtr);
+	}
+
+	jsonw_end_array(json_wtr);
+}
+
+static void print_one(bool present, const char *name, unsigned long long val,
+		      int *line)
+{
+	if (!present)
+		return;
+
+	if (!*line) {
+		printf("              ");
+		++(*line);
+	}
+
+	/* Don't waste space on tx- and rx- prefix, its implied by queue type */
+	if (scope == NETDEV_QSTATS_SCOPE_QUEUE &&
+	    (name[0] == 'r' || name[0] == 't') &&
+	    name[1] == 'x' && name[2] == '-')
+		name += 3;
+
+	printf(" %15s: %15llu", name, val);
+
+	if (++(*line) == 3) {
+		printf("\n");
+		*line = 0;
+	}
+}
+
+static void print_plain_qstats(struct netdev_qstats_get_list *qstats)
+{
+	ynl_dump_foreach(qstats, qs) {
+		char ifname[IF_NAMESIZE];
+		const char *name;
+		int n;
+
+		name = if_indextoname(qs->ifindex, ifname);
+		if (name)
+			printf("%s", name);
+		else
+			printf("ifindex:%u", qs->ifindex);
+
+		if (qs->_present.queue_type && qs->_present.queue_id)
+			printf("\t%s-%-3u",
+			       netdev_queue_type_str(qs->queue_type),
+			       qs->queue_id);
+		else
+			printf("\t      ");
+
+		n = 1;
+
+		/* Basic counters */
+		print_one(qs->_present.rx_packets, "rx-packets", qs->rx_packets, &n);
+		print_one(qs->_present.rx_bytes, "rx-bytes", qs->rx_bytes, &n);
+		print_one(qs->_present.tx_packets, "tx-packets", qs->tx_packets, &n);
+		print_one(qs->_present.tx_bytes, "tx-bytes", qs->tx_bytes, &n);
+
+		/* RX error/drop counters */
+		print_one(qs->_present.rx_alloc_fail, "rx-alloc-fail",
+			  qs->rx_alloc_fail, &n);
+		print_one(qs->_present.rx_hw_drops, "rx-hw-drops",
+			  qs->rx_hw_drops, &n);
+		print_one(qs->_present.rx_hw_drop_overruns, "rx-hw-drop-overruns",
+			  qs->rx_hw_drop_overruns, &n);
+		print_one(qs->_present.rx_hw_drop_ratelimits, "rx-hw-drop-ratelimits",
+			  qs->rx_hw_drop_ratelimits, &n);
+
+		/* RX checksum counters */
+		print_one(qs->_present.rx_csum_complete, "rx-csum-complete",
+			  qs->rx_csum_complete, &n);
+		print_one(qs->_present.rx_csum_unnecessary, "rx-csum-unnecessary",
+			  qs->rx_csum_unnecessary, &n);
+		print_one(qs->_present.rx_csum_none, "rx-csum-none",
+			  qs->rx_csum_none, &n);
+		print_one(qs->_present.rx_csum_bad, "rx-csum-bad",
+			  qs->rx_csum_bad, &n);
+
+		/* RX GRO counters */
+		print_one(qs->_present.rx_hw_gro_packets, "rx-hw-gro-packets",
+			  qs->rx_hw_gro_packets, &n);
+		print_one(qs->_present.rx_hw_gro_bytes, "rx-hw-gro-bytes",
+			  qs->rx_hw_gro_bytes, &n);
+		print_one(qs->_present.rx_hw_gro_wire_packets, "rx-hw-gro-wire-packets",
+			  qs->rx_hw_gro_wire_packets, &n);
+		print_one(qs->_present.rx_hw_gro_wire_bytes, "rx-hw-gro-wire-bytes",
+			  qs->rx_hw_gro_wire_bytes, &n);
+
+		/* TX error/drop counters */
+		print_one(qs->_present.tx_hw_drops, "tx-hw-drops",
+			  qs->tx_hw_drops, &n);
+		print_one(qs->_present.tx_hw_drop_errors, "tx-hw-drop-errors",
+			  qs->tx_hw_drop_errors, &n);
+		print_one(qs->_present.tx_hw_drop_ratelimits, "tx-hw-drop-ratelimits",
+			  qs->tx_hw_drop_ratelimits, &n);
+
+		/* TX checksum counters */
+		print_one(qs->_present.tx_csum_none, "tx-csum-none",
+			  qs->tx_csum_none, &n);
+		print_one(qs->_present.tx_needs_csum, "tx-needs-csum",
+			  qs->tx_needs_csum, &n);
+
+		/* TX GSO counters */
+		print_one(qs->_present.tx_hw_gso_packets, "tx-hw-gso-packets",
+			  qs->tx_hw_gso_packets, &n);
+		print_one(qs->_present.tx_hw_gso_bytes, "tx-hw-gso-bytes",
+			  qs->tx_hw_gso_bytes, &n);
+		print_one(qs->_present.tx_hw_gso_wire_packets, "tx-hw-gso-wire-packets",
+			  qs->tx_hw_gso_wire_packets, &n);
+		print_one(qs->_present.tx_hw_gso_wire_bytes, "tx-hw-gso-wire-bytes",
+			  qs->tx_hw_gso_wire_bytes, &n);
+
+		/* TX queue control */
+		print_one(qs->_present.tx_stop, "tx-stop", qs->tx_stop, &n);
+		print_one(qs->_present.tx_wake, "tx-wake", qs->tx_wake, &n);
+
+		if (n)
+			printf("\n");
+	}
+}
+
+static int do_show(int argc, char **argv)
+{
+	struct netdev_qstats_get_list *qstats;
+	struct netdev_qstats_get_req *req;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	int ret = 0;
+
+	/* Parse options */
+	while (argc > 0) {
+		if (is_prefix(*argv, "scope") || is_prefix(*argv, "group-by")) {
+			NEXT_ARG();
+
+			if (!REQ_ARGS(1))
+				return -1;
+
+			if (is_prefix(*argv, "queue")) {
+				scope = NETDEV_QSTATS_SCOPE_QUEUE;
+			} else if (is_prefix(*argv, "device")) {
+				scope = 0;
+			} else {
+				p_err("invalid scope value '%s'", *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else {
+			p_err("unknown option '%s'", *argv);
+			return -1;
+		}
+	}
+
+	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!ys) {
+		p_err("YNL: %s", yerr.msg);
+		return -1;
+	}
+
+	req = netdev_qstats_get_req_alloc();
+	if (!req) {
+		p_err("failed to allocate qstats request");
+		ret = -1;
+		goto exit_close;
+	}
+
+	if (scope)
+		netdev_qstats_get_req_set_scope(req, scope);
+
+	qstats = netdev_qstats_get_dump(ys, req);
+	netdev_qstats_get_req_free(req);
+	if (!qstats) {
+		p_err("failed to get queue stats: %s", ys->err.msg);
+		ret = -1;
+		goto exit_close;
+	}
+
+	/* Print the stats as returned by the kernel */
+	if (json_output)
+		print_json_qstats(qstats);
+	else
+		print_plain_qstats(qstats);
+
+	netdev_qstats_get_list_free(qstats);
+exit_close:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static void compute_stats(__u64 *values, unsigned int count,
+			  double *mean, double *stddev, __u64 *min, __u64 *max)
+{
+	double sum = 0.0, variance = 0.0;
+	unsigned int i;
+
+	*min = ~0ULL;
+	*max = 0;
+
+	if (count == 0) {
+		*mean = 0;
+		*stddev = 0;
+		*min = 0;
+		return;
+	}
+
+	for (i = 0; i < count; i++) {
+		sum += values[i];
+		if (values[i] < *min)
+			*min = values[i];
+		if (values[i] > *max)
+			*max = values[i];
+	}
+
+	*mean = sum / count;
+
+	if (count > 1) {
+		for (i = 0; i < count; i++) {
+			double diff = values[i] - *mean;
+
+			variance += diff * diff;
+		}
+		*stddev = sqrt(variance / (count - 1));
+	} else {
+		*stddev = 0;
+	}
+}
+
+static void print_balance_stats(const char *name, enum netdev_queue_type type,
+				__u64 *values, unsigned int count)
+{
+	double mean, stddev, cv, ns;
+	__u64 min, max;
+
+	if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) ||
+	    (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX))
+		return;
+
+	compute_stats(values, count, &mean, &stddev, &min, &max);
+
+	cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0;
+	ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0;
+
+	printf("  %-12s: cv=%.1f%% ns=%.1f%% stddev=%.0f\n",
+	       name, cv, ns, stddev);
+	printf("  %-12s  min=%llu max=%llu mean=%.0f\n",
+	       "", min, max, mean);
+}
+
+static void
+print_balance_stats_json(const char *name, enum netdev_queue_type type,
+			 __u64 *values, unsigned int count)
+{
+	double mean, stddev, cv, ns;
+	__u64 min, max;
+
+	if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) ||
+	    (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX))
+		return;
+
+	compute_stats(values, count, &mean, &stddev, &min, &max);
+
+	cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0;
+	ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0;
+
+	jsonw_name(json_wtr, name);
+	jsonw_start_object(json_wtr);
+	jsonw_uint_field(json_wtr, "queue-count", count);
+	jsonw_uint_field(json_wtr, "min", min);
+	jsonw_uint_field(json_wtr, "max", max);
+	jsonw_float_field(json_wtr, "mean", mean);
+	jsonw_float_field(json_wtr, "stddev", stddev);
+	jsonw_float_field(json_wtr, "coefficient-of-variation", cv);
+	jsonw_float_field(json_wtr, "normalized-spread", ns);
+	jsonw_end_object(json_wtr);
+}
+
+static int cmp_ifindex_type(const void *a, const void *b)
+{
+	const struct netdev_qstats_get_rsp *qa = a;
+	const struct netdev_qstats_get_rsp *qb = b;
+
+	if (qa->ifindex != qb->ifindex)
+		return qa->ifindex - qb->ifindex;
+	if (qa->queue_type != qb->queue_type)
+		return qa->queue_type - qb->queue_type;
+	return qa->queue_id - qb->queue_id;
+}
+
+static int do_balance(int argc, char **argv __attribute__((unused)))
+{
+	struct netdev_qstats_get_list *qstats;
+	struct netdev_qstats_get_req *req;
+	struct netdev_qstats_get_rsp **sorted;
+	struct ynl_error yerr;
+	struct ynl_sock *ys;
+	unsigned int count = 0;
+	unsigned int i, j;
+	int ret = 0;
+
+	if (argc > 0) {
+		p_err("balance command takes no arguments");
+		return -1;
+	}
+
+	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+	if (!ys) {
+		p_err("YNL: %s", yerr.msg);
+		return -1;
+	}
+
+	req = netdev_qstats_get_req_alloc();
+	if (!req) {
+		p_err("failed to allocate qstats request");
+		ret = -1;
+		goto exit_close;
+	}
+
+	/* Always use queue scope for balance analysis */
+	netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE);
+
+	qstats = netdev_qstats_get_dump(ys, req);
+	netdev_qstats_get_req_free(req);
+	if (!qstats) {
+		p_err("failed to get queue stats: %s", ys->err.msg);
+		ret = -1;
+		goto exit_close;
+	}
+
+	/* Count and sort queues */
+	ynl_dump_foreach(qstats, qs)
+		count++;
+
+	if (count == 0) {
+		if (json_output)
+			jsonw_start_array(json_wtr);
+		else
+			printf("No queue statistics available\n");
+		goto exit_free_qstats;
+	}
+
+	sorted = calloc(count, sizeof(*sorted));
+	if (!sorted) {
+		p_err("failed to allocate sorted array");
+		ret = -1;
+		goto exit_free_qstats;
+	}
+
+	i = 0;
+	ynl_dump_foreach(qstats, qs)
+		sorted[i++] = qs;
+
+	qsort(sorted, count, sizeof(*sorted), cmp_ifindex_type);
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+
+	/* Process each device/queue-type combination */
+	i = 0;
+	while (i < count) {
+		__u64 *rx_packets, *rx_bytes, *tx_packets, *tx_bytes;
+		enum netdev_queue_type type = sorted[i]->queue_type;
+		unsigned int ifindex = sorted[i]->ifindex;
+		unsigned int queue_count = 0;
+		char ifname[IF_NAMESIZE];
+		const char *name;
+
+		/* Count queues for this device/type */
+		for (j = i; j < count && sorted[j]->ifindex == ifindex &&
+		     sorted[j]->queue_type == type; j++)
+			queue_count++;
+
+		/* Skip if no packets/bytes (inactive queues) */
+		if (!sorted[i]->_present.rx_packets &&
+		    !sorted[i]->_present.rx_bytes &&
+		    !sorted[i]->_present.tx_packets &&
+		    !sorted[i]->_present.tx_bytes)
+			goto next_ifc;
+
+		/* Allocate arrays for statistics */
+		rx_packets = calloc(queue_count, sizeof(*rx_packets));
+		rx_bytes   = calloc(queue_count, sizeof(*rx_bytes));
+		tx_packets = calloc(queue_count, sizeof(*tx_packets));
+		tx_bytes   = calloc(queue_count, sizeof(*tx_bytes));
+
+		if (!rx_packets || !rx_bytes || !tx_packets || !tx_bytes) {
+			p_err("failed to allocate statistics arrays");
+			free(rx_packets);
+			free(rx_bytes);
+			free(tx_packets);
+			free(tx_bytes);
+			ret = -1;
+			goto exit_free_sorted;
+		}
+
+		/* Collect statistics */
+		for (j = 0; j < queue_count; j++) {
+			rx_packets[j] = sorted[i + j]->_present.rx_packets ?
+					sorted[i + j]->rx_packets : 0;
+			rx_bytes[j] = sorted[i + j]->_present.rx_bytes ?
+				      sorted[i + j]->rx_bytes : 0;
+			tx_packets[j] = sorted[i + j]->_present.tx_packets ?
+					sorted[i + j]->tx_packets : 0;
+			tx_bytes[j] = sorted[i + j]->_present.tx_bytes ?
+				      sorted[i + j]->tx_bytes : 0;
+		}
+
+		name = if_indextoname(ifindex, ifname);
+
+		if (json_output) {
+			jsonw_start_object(json_wtr);
+			if (name)
+				jsonw_string_field(json_wtr, "ifname", name);
+			jsonw_uint_field(json_wtr, "ifindex", ifindex);
+			jsonw_string_field(json_wtr, "queue-type",
+					   netdev_queue_type_str(type));
+
+			print_balance_stats_json("rx-packets", type,
+						 rx_packets, queue_count);
+			print_balance_stats_json("rx-bytes", type,
+						 rx_bytes, queue_count);
+			print_balance_stats_json("tx-packets", type,
+						 tx_packets, queue_count);
+			print_balance_stats_json("tx-bytes", type,
+						 tx_bytes, queue_count);
+
+			jsonw_end_object(json_wtr);
+		} else {
+			if (name)
+				printf("%s", name);
+			else
+				printf("ifindex:%u", ifindex);
+			printf(" %s %d queues:\n",
+			       netdev_queue_type_str(type), queue_count);
+
+			print_balance_stats("rx-packets", type,
+					    rx_packets, queue_count);
+			print_balance_stats("rx-bytes", type,
+					    rx_bytes, queue_count);
+			print_balance_stats("tx-packets", type,
+					    tx_packets, queue_count);
+			print_balance_stats("tx-bytes", type,
+					    tx_bytes, queue_count);
+			printf("\n");
+		}
+
+		free(rx_packets);
+		free(rx_bytes);
+		free(tx_packets);
+		free(tx_bytes);
+
+next_ifc:
+		i += queue_count;
+	}
+
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+exit_free_sorted:
+	free(sorted);
+exit_free_qstats:
+	netdev_qstats_get_list_free(qstats);
+exit_close:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static int do_help(int argc __attribute__((unused)),
+		   char **argv __attribute__((unused)))
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %s qstats { COMMAND | help }\n"
+		"       %s qstats [ show ] [ OPTIONS ]\n"
+		"       %s qstats balance\n"
+		"\n"
+		"       OPTIONS := { scope queue | group-by { device | queue } }\n"
+		"\n"
+		"       show                  - Display queue statistics (default)\n"
+		"                               Statistics are aggregated for the entire device.\n"
+		"       show scope queue      - Display per-queue statistics\n"
+		"       show group-by device  - Display device-aggregated statistics (default)\n"
+		"       show group-by queue   - Display per-queue statistics\n"
+		"       balance               - Analyze traffic distribution balance.\n"
+		"",
+		bin_name, bin_name, bin_name);
+
+	return 0;
+}
+
+static const struct cmd qstats_cmds[] = {
+	{ "show",	do_show },
+	{ "balance",	do_balance },
+	{ "help",	do_help },
+	{ 0 }
+};
+
+int do_qstats(int argc, char **argv)
+{
+	return cmd_select(qstats_cmds, argc, argv, do_help);
+}
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index 4faa4dd72f35..73d883128511 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
+arch/x86/lib/cpu-feature-names.c
 arch/x86/lib/inat-tables.c
 /objtool
+feature
+FEATURE-DUMP.objtool
 fixdep
 libsubcmd/
diff --git a/tools/objtool/Build b/tools/objtool/Build
index a3cdf8af6635..9982e665d58d 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -8,8 +8,11 @@ objtool-y += builtin-check.o
 objtool-y += elf.o
 objtool-y += objtool.o
 
-objtool-$(BUILD_ORC) += orc_gen.o
-objtool-$(BUILD_ORC) += orc_dump.o
+objtool-$(BUILD_DISAS) += disas.o
+objtool-$(BUILD_DISAS) += trace.o
+
+objtool-$(BUILD_ORC) += orc_gen.o orc_dump.o
+objtool-$(BUILD_KLP) += builtin-klp.o klp-diff.o klp-post-link.o
 
 objtool-y += libstring.o
 objtool-y += libctype.o
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 8c20361dd100..ad6e1ec706ce 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -2,6 +2,28 @@
 include ../scripts/Makefile.include
 include ../scripts/Makefile.arch
 
+ifeq ($(SRCARCH),x86)
+	BUILD_ORC    := y
+	ARCH_HAS_KLP := y
+endif
+
+ifeq ($(SRCARCH),loongarch)
+	BUILD_ORC	   := y
+endif
+
+ifeq ($(ARCH_HAS_KLP),y)
+	HAVE_XXHASH = $(shell printf "$(pound)include <xxhash.h>\nXXH3_state_t *state;int main() {}" | \
+		      $(HOSTCC) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n)
+	ifeq ($(HAVE_XXHASH),y)
+		BUILD_KLP	 := y
+		LIBXXHASH_CFLAGS := $(shell $(HOSTPKG_CONFIG) libxxhash --cflags 2>/dev/null) \
+				    -DBUILD_KLP
+		LIBXXHASH_LIBS   := $(shell $(HOSTPKG_CONFIG) libxxhash --libs 2>/dev/null || echo -lxxhash)
+	endif
+endif
+
+export BUILD_ORC BUILD_KLP
+
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -23,6 +45,11 @@ LIBELF_LIBS  := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lel
 
 all: $(OBJTOOL)
 
+WARNINGS := -Werror -Wall -Wextra -Wmissing-prototypes			\
+	    -Wmissing-declarations -Wwrite-strings			\
+	    -Wno-implicit-fallthrough -Wno-sign-compare			\
+	    -Wno-unused-parameter
+
 INCLUDES := -I$(srctree)/tools/include \
 	    -I$(srctree)/tools/include/uapi \
 	    -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
@@ -30,11 +57,11 @@ INCLUDES := -I$(srctree)/tools/include \
 	    -I$(srctree)/tools/objtool/include \
 	    -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include \
 	    -I$(LIBSUBCMD_OUTPUT)/include
-# Note, EXTRA_WARNINGS here was determined for CC and not HOSTCC, it
-# is passed here to match a legacy behavior.
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
-OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
-OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
+
+OBJTOOL_CFLAGS  := -std=gnu11 -fomit-frame-pointer -O2 -g $(WARNINGS)	\
+		   $(INCLUDES) $(LIBELF_FLAGS) $(LIBXXHASH_CFLAGS) $(HOSTCFLAGS)
+
+OBJTOOL_LDFLAGS := $(LIBSUBCMD) $(LIBELF_LIBS) $(LIBXXHASH_LIBS) $(HOSTLDFLAGS)
 
 # Allow old libelf to be used:
 elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - 2>/dev/null | grep elf_getshdr)
@@ -43,20 +70,32 @@ OBJTOOL_CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
 # Always want host compilation.
 HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)"
 
-AWK = awk
-MKDIR = mkdir
+#
+# To support disassembly, objtool needs libopcodes which is provided
+# with libbdf (binutils-dev or binutils-devel package).
+#
+FEATURE_USER = .objtool
+FEATURE_TESTS = libbfd disassembler-init-styled
+FEATURE_DISPLAY =
+include $(srctree)/tools/build/Makefile.feature
+
+ifeq ($(feature-disassembler-init-styled), 1)
+	OBJTOOL_CFLAGS += -DDISASM_INIT_STYLED
+endif
 
-BUILD_ORC := n
+BUILD_DISAS := n
 
-ifeq ($(SRCARCH),x86)
-	BUILD_ORC := y
+ifeq ($(feature-libbfd),1)
+	BUILD_DISAS := y
+	OBJTOOL_CFLAGS += -DDISAS -DPACKAGE="objtool"
+	OBJTOOL_LDFLAGS += -lopcodes
 endif
 
-ifeq ($(SRCARCH),loongarch)
-	BUILD_ORC := y
-endif
+export BUILD_DISAS
+
+AWK = awk
+MKDIR = mkdir
 
-export BUILD_ORC
 export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
 
@@ -86,7 +125,10 @@ $(LIBSUBCMD)-clean:
 clean: $(LIBSUBCMD)-clean
 	$(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
 	$(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+	$(Q)$(RM) $(OUTPUT)arch/x86/lib/cpu-feature-names.c $(OUTPUT)fixdep
 	$(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
+	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.objtool
+	$(Q)$(RM) -r -- $(OUTPUT)feature
 
 FORCE:
 
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index 2e555c4060c5..6cd288150f49 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -1,13 +1,25 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <string.h>
 #include <objtool/check.h>
+#include <objtool/disas.h>
 #include <objtool/warn.h>
 #include <asm/inst.h>
 #include <asm/orc_types.h>
 #include <linux/objtool_types.h>
 #include <arch/elf.h>
 
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+	"zero", "ra", "tp", "sp",
+	"a0", "a1", "a2", "a3",
+	"a4", "a5", "a6", "a7",
+	"t0", "t1", "t2", "t3",
+	"t4", "t5", "t6", "t7",
+	"t8", "u0", "fp", "s0",
+	"s1", "s2", "s3", "s4",
+	"s5", "s6", "s7", "s8"
+};
+
+int arch_ftrace_match(const char *name)
 {
 	return !strcmp(name, "_mcount");
 }
@@ -17,9 +29,9 @@ unsigned long arch_jump_destination(struct instruction *insn)
 	return insn->offset + (insn->immediate << 2);
 }
 
-unsigned long arch_dest_reloc_offset(int addend)
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
 {
-	return addend;
+	return reloc_addend(reloc);
 }
 
 bool arch_pc_relative_reloc(struct reloc *reloc)
@@ -414,3 +426,14 @@ unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *tabl
 		return reloc->sym->offset + reloc_addend(reloc);
 	}
 }
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+	return disas_info_init(dinfo, bfd_arch_loongarch,
+			       bfd_mach_loongarch32, bfd_mach_loongarch64,
+			       NULL);
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/loongarch/orc.c b/tools/objtool/arch/loongarch/orc.c
index b58c5ff443c9..ffd3a3c858ae 100644
--- a/tools/objtool/arch/loongarch/orc.c
+++ b/tools/objtool/arch/loongarch/orc.c
@@ -5,7 +5,6 @@
 #include <objtool/check.h>
 #include <objtool/orc.h>
 #include <objtool/warn.h>
-#include <objtool/endianness.h>
 
 int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruction *insn)
 {
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index a80b75f7b061..aba774109437 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -194,3 +194,8 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
 
 	return rodata_reloc;
 }
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+	return NULL;
+}
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index c851c51d4bd3..e534ac1123b3 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -3,20 +3,32 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <objtool/check.h>
+#include <objtool/disas.h>
 #include <objtool/elf.h>
 #include <objtool/arch.h>
 #include <objtool/warn.h>
 #include <objtool/builtin.h>
-#include <objtool/endianness.h>
 
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+	"r0",  "sp",  "r2",  "r3",
+	"r4",  "r5",  "r6",  "r7",
+	"r8",  "r9",  "r10", "r11",
+	"r12", "r13", "r14", "r15",
+	"r16", "r17", "r18", "r19",
+	"r20", "r21", "r22", "r23",
+	"r24", "r25", "r26", "r27",
+	"r28", "r29", "r30", "r31",
+	"ra"
+};
+
+int arch_ftrace_match(const char *name)
 {
 	return !strcmp(name, "_mcount");
 }
 
-unsigned long arch_dest_reloc_offset(int addend)
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
 {
-	return addend;
+	return reloc_addend(reloc);
 }
 
 bool arch_callee_saved_reg(unsigned char reg)
@@ -128,3 +140,14 @@ unsigned int arch_reloc_size(struct reloc *reloc)
 		return 8;
 	}
 }
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+	return disas_info_init(dinfo, bfd_arch_powerpc,
+			       bfd_mach_ppc, bfd_mach_ppc64,
+			       NULL);
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c
index 51610689abf7..8f9bf61ca089 100644
--- a/tools/objtool/arch/powerpc/special.c
+++ b/tools/objtool/arch/powerpc/special.c
@@ -18,3 +18,8 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
 {
 	exit(-1);
 }
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+	return NULL;
+}
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index 3dedb2fd8f3a..febee0b8ee0b 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,5 +1,5 @@
-objtool-y += special.o
 objtool-y += decode.o
+objtool-y += special.o
 objtool-y += orc.o
 
 inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk
@@ -12,3 +12,14 @@ $(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
 $(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c
 
 CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib
+
+cpu_features = ../arch/x86/include/asm/cpufeatures.h
+cpu_features_script = ../arch/x86/tools/gen-cpu-feature-names-x86.awk
+
+$(OUTPUT)arch/x86/lib/cpu-feature-names.c: $(cpu_features_script) $(cpu_features)
+	$(call rule_mkdir)
+	$(Q)$(call echo-cmd,gen)$(AWK) -f $(cpu_features_script) $(cpu_features) > $@
+
+$(OUTPUT)arch/x86/special.o: $(OUTPUT)arch/x86/lib/cpu-feature-names.c
+
+CFLAGS_special.o += -I$(OUTPUT)arch/x86/lib
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0ad5cc70ecbe..f4af82508228 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -16,14 +16,22 @@
 
 #include <asm/orc_types.h>
 #include <objtool/check.h>
+#include <objtool/disas.h>
 #include <objtool/elf.h>
 #include <objtool/arch.h>
 #include <objtool/warn.h>
-#include <objtool/endianness.h>
 #include <objtool/builtin.h>
 #include <arch/elf.h>
 
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+	"rax", "rcx", "rdx", "rbx",
+	"rsp", "rbp", "rsi", "rdi",
+	"r8",  "r9",  "r10", "r11",
+	"r12", "r13", "r14", "r15",
+	"ra"
+};
+
+int arch_ftrace_match(const char *name)
 {
 	return !strcmp(name, "__fentry__");
 }
@@ -68,9 +76,65 @@ bool arch_callee_saved_reg(unsigned char reg)
 	}
 }
 
-unsigned long arch_dest_reloc_offset(int addend)
+/* Undo the effects of __pa_symbol() if necessary */
+static unsigned long phys_to_virt(unsigned long pa)
+{
+	s64 va = pa;
+
+	if (va > 0)
+		va &= ~(0x80000000);
+
+	return va;
+}
+
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
+{
+	s64 addend = reloc_addend(reloc);
+
+	if (arch_pc_relative_reloc(reloc))
+		addend += insn->offset + insn->len - reloc_offset(reloc);
+
+	return phys_to_virt(addend);
+}
+
+static void scan_for_insn(struct section *sec, unsigned long offset,
+			  unsigned long *insn_off, unsigned int *insn_len)
 {
-	return addend + 4;
+	unsigned long o = 0;
+	struct insn insn;
+
+	while (1) {
+
+		insn_decode(&insn, sec->data->d_buf + o, sec_size(sec) - o,
+			    INSN_MODE_64);
+
+		if (o + insn.length > offset) {
+			*insn_off = o;
+			*insn_len = insn.length;
+			return;
+		}
+
+		o += insn.length;
+	}
+}
+
+u64 arch_adjusted_addend(struct reloc *reloc)
+{
+	unsigned int type = reloc_type(reloc);
+	s64 addend = reloc_addend(reloc);
+	unsigned long insn_off;
+	unsigned int insn_len;
+
+	if (type == R_X86_64_PLT32)
+		return addend + 4;
+
+	if (type != R_X86_64_PC32 || !is_text_sec(reloc->sec->base))
+		return addend;
+
+	scan_for_insn(reloc->sec->base, reloc_offset(reloc),
+		      &insn_off, &insn_len);
+
+	return addend + insn_off + insn_len - reloc_offset(reloc);
 }
 
 unsigned long arch_jump_destination(struct instruction *insn)
@@ -189,15 +253,6 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 	op2 = ins.opcode.bytes[1];
 	op3 = ins.opcode.bytes[2];
 
-	/*
-	 * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
-	 */
-	if (op1 == 0xea) {
-		insn->len = 1;
-		insn->type = INSN_BUG;
-		return 0;
-	}
-
 	if (ins.rex_prefix.nbytes) {
 		rex = ins.rex_prefix.bytes[0];
 		rex_w = X86_REX_W(rex) >> 3;
@@ -503,6 +558,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		break;
 
 	case 0x90:
+		if (rex_b) /* XCHG %r8, %rax */
+			break;
+
+		if (prefix == 0xf3) /* REP NOP := PAUSE */
+			break;
+
 		insn->type = INSN_NOP;
 		break;
 
@@ -556,13 +617,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 
 		} else if (op2 == 0x0b || op2 == 0xb9) {
 
-			/* ud2 */
+			/* ud2, ud1 */
 			insn->type = INSN_BUG;
 
-		} else if (op2 == 0x0d || op2 == 0x1f) {
+		} else if (op2 == 0x1f) {
 
-			/* nopl/nopw */
-			insn->type = INSN_NOP;
+			/* 0f 1f /0 := NOPL */
+			if (modrm_reg == 0)
+				insn->type = INSN_NOP;
 
 		} else if (op2 == 0x1e) {
 
@@ -692,6 +754,10 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 		insn->type = INSN_SYSRET;
 		break;
 
+	case 0xd6: /* udb */
+		insn->type = INSN_BUG;
+		break;
+
 	case 0xe0: /* loopne */
 	case 0xe1: /* loope */
 	case 0xe2: /* loop */
@@ -892,3 +958,14 @@ bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
 		return false;
 	}
 }
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+	return disas_info_init(dinfo, bfd_arch_i386,
+			       bfd_mach_i386_i386, bfd_mach_x86_64,
+			       "att");
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/x86/orc.c b/tools/objtool/arch/x86/orc.c
index 7176b9ec5b05..735e150ca6b7 100644
--- a/tools/objtool/arch/x86/orc.c
+++ b/tools/objtool/arch/x86/orc.c
@@ -5,7 +5,6 @@
 #include <objtool/check.h>
 #include <objtool/orc.h>
 #include <objtool/warn.h>
-#include <objtool/endianness.h>
 
 int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruction *insn)
 {
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 06ca4a2659a4..e817a3fff449 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -4,6 +4,10 @@
 #include <objtool/special.h>
 #include <objtool/builtin.h>
 #include <objtool/warn.h>
+#include <asm/cpufeatures.h>
+
+/* cpu feature name array generated from cpufeatures.h */
+#include "cpu-feature-names.c"
 
 void arch_handle_alternative(struct special_alt *alt)
 {
@@ -89,7 +93,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
 	/* look for a relocation which references .rodata */
 	text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
 					      insn->offset, insn->len);
-	if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+	if (!text_reloc || !is_sec_sym(text_reloc->sym) ||
 	    !text_reloc->sym->sec->rodata)
 		return NULL;
 
@@ -134,3 +138,9 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
 	*table_size = 0;
 	return rodata_reloc;
 }
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+	return (feature_number < ARRAY_SIZE(cpu_feature_names)) ?
+		cpu_feature_names[feature_number] : NULL;
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 0f6b197cfcb0..b780df513715 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -73,35 +73,41 @@ static int parse_hacks(const struct option *opt, const char *str, int unset)
 
 static const struct option check_options[] = {
 	OPT_GROUP("Actions:"),
+	OPT_BOOLEAN(0,		 "checksum", &opts.checksum, "generate per-function checksums"),
+	OPT_BOOLEAN(0,		 "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
+	OPT_STRING_OPTARG('d',	 "disas", &opts.disas, "function-pattern", "disassemble functions", "*"),
 	OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks),
-	OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
-	OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
-	OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
-	OPT_BOOLEAN(0,   "orc", &opts.orc, "generate ORC metadata"),
-	OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
-	OPT_BOOLEAN(0,   "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
-	OPT_BOOLEAN(0,   "unret", &opts.unret, "validate entry unret placement"),
-	OPT_INTEGER(0,   "prefix", &opts.prefix, "generate prefix symbols"),
-	OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
-	OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
-	OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
-	OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
-	OPT_BOOLEAN(0  , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
-	OPT_BOOLEAN(0  , "noabs", &opts.noabs, "reject absolute references in allocatable sections"),
-	OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
+	OPT_BOOLEAN('i',	 "ibt", &opts.ibt, "validate and annotate IBT"),
+	OPT_BOOLEAN('m',	 "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
+	OPT_BOOLEAN(0,		 "noabs", &opts.noabs, "reject absolute references in allocatable sections"),
+	OPT_BOOLEAN('n',	 "noinstr", &opts.noinstr, "validate noinstr rules"),
+	OPT_BOOLEAN(0,		 "orc", &opts.orc, "generate ORC metadata"),
+	OPT_BOOLEAN('r',	 "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+	OPT_BOOLEAN(0,		 "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
+	OPT_BOOLEAN(0,		 "unret", &opts.unret, "validate entry unret placement"),
+	OPT_INTEGER(0,		 "prefix", &opts.prefix, "generate prefix symbols"),
+	OPT_BOOLEAN('l',	 "sls", &opts.sls, "validate straight-line-speculation mitigations"),
+	OPT_BOOLEAN('s',	 "stackval", &opts.stackval, "validate frame pointer rules"),
+	OPT_BOOLEAN('t',	 "static-call", &opts.static_call, "annotate static calls"),
+	OPT_BOOLEAN('u',	 "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
+	OPT_CALLBACK_OPTARG(0,	 "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
 
 	OPT_GROUP("Options:"),
-	OPT_BOOLEAN(0,   "backtrace", &opts.backtrace, "unwind on error"),
-	OPT_BOOLEAN(0,   "dry-run", &opts.dryrun, "don't write modifications"),
-	OPT_BOOLEAN(0,   "link", &opts.link, "object is a linked object"),
-	OPT_BOOLEAN(0,   "module", &opts.module, "object is part of a kernel module"),
-	OPT_BOOLEAN(0,   "mnop", &opts.mnop, "nop out mcount call sites"),
-	OPT_BOOLEAN(0,   "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
-	OPT_STRING('o',  "output", &opts.output, "file", "output file name"),
-	OPT_BOOLEAN(0,   "sec-address", &opts.sec_address, "print section addresses in warnings"),
-	OPT_BOOLEAN(0,   "stats", &opts.stats, "print statistics"),
-	OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
-	OPT_BOOLEAN(0,   "Werror", &opts.werror, "return error on warnings"),
+	OPT_BOOLEAN(0,		 "backtrace", &opts.backtrace, "unwind on error"),
+	OPT_BOOLEAN(0,		 "backup", &opts.backup, "create backup (.orig) file on warning/error"),
+	OPT_STRING(0,		 "debug-checksum", &opts.debug_checksum,  "funcs", "enable checksum debug output"),
+	OPT_BOOLEAN(0,		 "dry-run", &opts.dryrun, "don't write modifications"),
+	OPT_BOOLEAN(0,		 "link", &opts.link, "object is a linked object"),
+	OPT_BOOLEAN(0,		 "module", &opts.module, "object is part of a kernel module"),
+	OPT_BOOLEAN(0,		 "mnop", &opts.mnop, "nop out mcount call sites"),
+	OPT_BOOLEAN(0,		 "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+	OPT_STRING('o',		 "output", &opts.output, "file", "output file name"),
+	OPT_BOOLEAN(0,		 "sec-address", &opts.sec_address, "print section addresses in warnings"),
+	OPT_BOOLEAN(0,		 "stats", &opts.stats, "print statistics"),
+	OPT_STRING(0,		 "trace", &opts.trace, "func", "trace function validation"),
+	OPT_BOOLEAN('v',	 "verbose", &opts.verbose, "verbose warnings"),
+	OPT_BOOLEAN(0,		 "werror", &opts.werror, "return error on warnings"),
+	OPT_BOOLEAN(0,		 "wide", &opts.wide, "wide output"),
 
 	OPT_END(),
 };
@@ -159,7 +165,21 @@ static bool opts_valid(void)
 		return false;
 	}
 
-	if (opts.hack_jump_label	||
+#ifndef BUILD_KLP
+	if (opts.checksum) {
+		ERROR("--checksum not supported; install xxhash-devel/libxxhash-dev (version >= 0.8) and recompile");
+		return false;
+	}
+#endif
+
+	if (opts.debug_checksum && !opts.checksum) {
+		ERROR("--debug-checksum requires --checksum");
+		return false;
+	}
+
+	if (opts.checksum		||
+	    opts.disas			||
+	    opts.hack_jump_label	||
 	    opts.hack_noinstr		||
 	    opts.ibt			||
 	    opts.mcount			||
@@ -243,15 +263,12 @@ static void save_argv(int argc, const char **argv)
 			ERROR_GLIBC("strdup(%s)", argv[i]);
 			exit(1);
 		}
-	};
+	}
 }
 
-void print_args(void)
+int make_backup(void)
 {
-	char *backup = NULL;
-
-	if (opts.output || opts.dryrun)
-		goto print;
+	char *backup;
 
 	/*
 	 * Make a backup before kbuild deletes the file so the error
@@ -260,33 +277,32 @@ void print_args(void)
 	backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
 	if (!backup) {
 		ERROR_GLIBC("malloc");
-		goto print;
+		return 1;
 	}
 
 	strcpy(backup, objname);
 	strcat(backup, ORIG_SUFFIX);
-	if (copy_file(objname, backup)) {
-		backup = NULL;
-		goto print;
-	}
+	if (copy_file(objname, backup))
+		return 1;
 
-print:
 	/*
-	 * Print the cmdline args to make it easier to recreate.  If '--output'
-	 * wasn't used, add it to the printed args with the backup as input.
+	 * Print the cmdline args to make it easier to recreate.
 	 */
+
 	fprintf(stderr, "%s", orig_argv[0]);
 
 	for (int i = 1; i < orig_argc; i++) {
 		char *arg = orig_argv[i];
 
-		if (backup && !strcmp(arg, objname))
+		/* Modify the printed args to use the backup */
+		if (!opts.output && !strcmp(arg, objname))
 			fprintf(stderr, " %s -o %s", backup, objname);
 		else
 			fprintf(stderr, " %s", arg);
 	}
 
 	fprintf(stderr, "\n");
+	return 0;
 }
 
 int objtool_run(int argc, const char **argv)
@@ -332,5 +348,5 @@ int objtool_run(int argc, const char **argv)
 	if (!opts.dryrun && file->elf->changed && elf_write(file->elf))
 		return 1;
 
-	return 0;
+	return elf_close(file->elf);
 }
diff --git a/tools/objtool/builtin-klp.c b/tools/objtool/builtin-klp.c
new file mode 100644
index 000000000000..56d5a5b92f72
--- /dev/null
+++ b/tools/objtool/builtin-klp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <subcmd/parse-options.h>
+#include <string.h>
+#include <stdlib.h>
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
+#include <objtool/klp.h>
+
+struct subcmd {
+	const char *name;
+	const char *description;
+	int (*fn)(int, const char **);
+};
+
+static struct subcmd subcmds[] = {
+	{ "diff",		"Generate binary diff of two object files",		cmd_klp_diff, },
+	{ "post-link",		"Finalize klp symbols/relocs after module linking",	cmd_klp_post_link, },
+};
+
+static void cmd_klp_usage(void)
+{
+	fprintf(stderr, "usage: objtool klp <subcommand> [<options>]\n\n");
+	fprintf(stderr, "Subcommands:\n");
+
+	for (int i = 0; i < ARRAY_SIZE(subcmds); i++) {
+		struct subcmd *cmd = &subcmds[i];
+
+		fprintf(stderr, "  %s\t%s\n", cmd->name, cmd->description);
+	}
+
+	exit(1);
+}
+
+int cmd_klp(int argc, const char **argv)
+{
+	argc--;
+	argv++;
+
+	if (!argc)
+		cmd_klp_usage();
+
+	if (argc) {
+		for (int i = 0; i < ARRAY_SIZE(subcmds); i++) {
+			struct subcmd *cmd = &subcmds[i];
+
+			if (!strcmp(cmd->name, argv[0]))
+				return cmd->fn(argc, argv);
+		}
+	}
+
+	cmd_klp_usage();
+	return 0;
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index a5770570b106..9ec0e07cce90 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3,6 +3,8 @@
  * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
  */
 
+#define _GNU_SOURCE /* memmem() */
+#include <fnmatch.h>
 #include <string.h>
 #include <stdlib.h>
 #include <inttypes.h>
@@ -11,10 +13,13 @@
 #include <objtool/builtin.h>
 #include <objtool/cfi.h>
 #include <objtool/arch.h>
+#include <objtool/disas.h>
 #include <objtool/check.h>
 #include <objtool/special.h>
+#include <objtool/trace.h>
 #include <objtool/warn.h>
-#include <objtool/endianness.h>
+#include <objtool/checksum.h>
+#include <objtool/util.h>
 
 #include <linux/objtool_types.h>
 #include <linux/hashtable.h>
@@ -22,11 +27,6 @@
 #include <linux/static_call_types.h>
 #include <linux/string.h>
 
-struct alternative {
-	struct alternative *next;
-	struct instruction *insn;
-};
-
 static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
 
 static struct cfi_init_state initial_func_cfi;
@@ -34,6 +34,10 @@ static struct cfi_state init_cfi;
 static struct cfi_state func_cfi;
 static struct cfi_state force_undefined_cfi;
 
+struct disas_context *objtool_disas_ctx;
+
+size_t sym_name_max_len;
+
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset)
 {
@@ -106,7 +110,7 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 #define for_each_insn(file, insn)					\
 	for (struct section *__sec, *__fake = (struct section *)1;	\
 	     __fake; __fake = NULL)					\
-		for_each_sec(file, __sec)				\
+		for_each_sec(file->elf, __sec)				\
 			sec_for_each_insn(file, __sec, insn)
 
 #define func_for_each_insn(file, func, insn)				\
@@ -131,15 +135,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 	for (insn = next_insn_same_sec(file, insn); insn;		\
 	     insn = next_insn_same_sec(file, insn))
 
-static inline struct symbol *insn_call_dest(struct instruction *insn)
-{
-	if (insn->type == INSN_JUMP_DYNAMIC ||
-	    insn->type == INSN_CALL_DYNAMIC)
-		return NULL;
-
-	return insn->_call_dest;
-}
-
 static inline struct reloc *insn_jump_table(struct instruction *insn)
 {
 	if (insn->type == INSN_JUMP_DYNAMIC ||
@@ -186,20 +181,6 @@ static bool is_sibling_call(struct instruction *insn)
 }
 
 /*
- * Checks if a string ends with another.
- */
-static bool str_ends_with(const char *s, const char *sub)
-{
-	const int slen = strlen(s);
-	const int sublen = strlen(sub);
-
-	if (sublen > slen)
-		return 0;
-
-	return !memcmp(s + slen - sublen, sub, sublen);
-}
-
-/*
  * Checks if a function is a Rust "noreturn" one.
  */
 static bool is_rust_noreturn(const struct symbol *func)
@@ -217,6 +198,7 @@ static bool is_rust_noreturn(const struct symbol *func)
 	 * these come from the Rust standard library).
 	 */
 	return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail")		||
+	       str_ends_with(func->name, "_4core6option13expect_failed")				||
 	       str_ends_with(func->name, "_4core6option13unwrap_failed")				||
 	       str_ends_with(func->name, "_4core6result13unwrap_failed")				||
 	       str_ends_with(func->name, "_4core9panicking5panic")					||
@@ -261,7 +243,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 	if (!func)
 		return false;
 
-	if (func->bind == STB_GLOBAL || func->bind == STB_WEAK) {
+	if (!is_local_sym(func)) {
 		if (is_rust_noreturn(func))
 			return true;
 
@@ -270,7 +252,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 				return true;
 	}
 
-	if (func->bind == STB_WEAK)
+	if (is_weak_sym(func))
 		return false;
 
 	if (!func->len)
@@ -430,14 +412,13 @@ static int decode_instructions(struct objtool_file *file)
 	struct symbol *func;
 	unsigned long offset;
 	struct instruction *insn;
-	int ret;
 
-	for_each_sec(file, sec) {
+	for_each_sec(file->elf, sec) {
 		struct instruction *insns = NULL;
 		u8 prev_len = 0;
 		u8 idx = 0;
 
-		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+		if (!is_text_sec(sec))
 			continue;
 
 		if (strcmp(sec->name, ".altinstr_replacement") &&
@@ -460,9 +441,9 @@ static int decode_instructions(struct objtool_file *file)
 		if (!strcmp(sec->name, ".init.text") && !opts.module)
 			sec->init = true;
 
-		for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
+		for (offset = 0; offset < sec_size(sec); offset += insn->len) {
 			if (!insns || idx == INSN_CHUNK_MAX) {
-				insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+				insns = calloc(INSN_CHUNK_SIZE, sizeof(*insn));
 				if (!insns) {
 					ERROR_GLIBC("calloc");
 					return -1;
@@ -479,11 +460,8 @@ static int decode_instructions(struct objtool_file *file)
 			insn->offset = offset;
 			insn->prev_len = prev_len;
 
-			ret = arch_decode_instruction(file, sec, offset,
-						      sec->sh.sh_size - offset,
-						      insn);
-			if (ret)
-				return ret;
+			if (arch_decode_instruction(file, sec, offset, sec_size(sec) - offset, insn))
+				return -1;
 
 			prev_len = insn->len;
 
@@ -500,12 +478,12 @@ static int decode_instructions(struct objtool_file *file)
 		}
 
 		sec_for_each_sym(sec, func) {
-			if (func->type != STT_NOTYPE && func->type != STT_FUNC)
+			if (!is_notype_sym(func) && !is_func_sym(func))
 				continue;
 
-			if (func->offset == sec->sh.sh_size) {
+			if (func->offset == sec_size(sec)) {
 				/* Heuristic: likely an "end" symbol */
-				if (func->type == STT_NOTYPE)
+				if (is_notype_sym(func))
 					continue;
 				ERROR("%s(): STT_FUNC at end of section", func->name);
 				return -1;
@@ -521,7 +499,7 @@ static int decode_instructions(struct objtool_file *file)
 
 			sym_for_each_insn(file, func, insn) {
 				insn->sym = func;
-				if (func->type == STT_FUNC &&
+				if (is_func_sym(func) &&
 				    insn->type == INSN_ENDBR &&
 				    list_empty(&insn->call_node)) {
 					if (insn->offset == func->offset) {
@@ -565,7 +543,7 @@ static int add_pv_ops(struct objtool_file *file, const char *symname)
 		idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long);
 
 		func = reloc->sym;
-		if (func->type == STT_SECTION)
+		if (is_sec_sym(func))
 			func = find_symbol_by_offset(reloc->sym->sec,
 						     reloc_addend(reloc));
 		if (!func) {
@@ -599,7 +577,7 @@ static int init_pv_ops(struct objtool_file *file)
 	};
 	const char *pv_ops;
 	struct symbol *sym;
-	int idx, nr, ret;
+	int idx, nr;
 
 	if (!opts.noinstr)
 		return 0;
@@ -611,7 +589,7 @@ static int init_pv_ops(struct objtool_file *file)
 		return 0;
 
 	nr = sym->len / sizeof(unsigned long);
-	file->pv_ops = calloc(sizeof(struct pv_state), nr);
+	file->pv_ops = calloc(nr, sizeof(struct pv_state));
 	if (!file->pv_ops) {
 		ERROR_GLIBC("calloc");
 		return -1;
@@ -621,14 +599,27 @@ static int init_pv_ops(struct objtool_file *file)
 		INIT_LIST_HEAD(&file->pv_ops[idx].targets);
 
 	for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) {
-		ret = add_pv_ops(file, pv_ops);
-		if (ret)
-			return ret;
+		if (add_pv_ops(file, pv_ops))
+			return -1;
 	}
 
 	return 0;
 }
 
+static bool is_livepatch_module(struct objtool_file *file)
+{
+	struct section *sec;
+
+	if (!opts.module)
+		return false;
+
+	sec = find_section_by_name(file->elf, ".modinfo");
+	if (!sec)
+		return false;
+
+	return memmem(sec->data->d_buf, sec_size(sec), "\0livepatch=Y", 12);
+}
+
 static int create_static_call_sections(struct objtool_file *file)
 {
 	struct static_call_site *site;
@@ -640,8 +631,14 @@ static int create_static_call_sections(struct objtool_file *file)
 
 	sec = find_section_by_name(file->elf, ".static_call_sites");
 	if (sec) {
-		INIT_LIST_HEAD(&file->static_call_list);
-		WARN("file already has .static_call_sites section, skipping");
+		/*
+		 * Livepatch modules may have already extracted the static call
+		 * site entries to take advantage of vmlinux static call
+		 * privileges.
+		 */
+		if (!file->klp)
+			WARN("file already has .static_call_sites section, skipping");
+
 		return 0;
 	}
 
@@ -685,7 +682,7 @@ static int create_static_call_sections(struct objtool_file *file)
 
 		key_sym = find_symbol_by_name(file->elf, tmp);
 		if (!key_sym) {
-			if (!opts.module) {
+			if (!opts.module || file->klp) {
 				ERROR("static_call: can't find static_call_key symbol: %s", tmp);
 				return -1;
 			}
@@ -828,7 +825,7 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
 		struct symbol *sym = insn->sym;
 		*site = 0;
 
-		if (opts.module && sym && sym->type == STT_FUNC &&
+		if (opts.module && sym && is_func_sym(sym) &&
 		    insn->offset == sym->offset &&
 		    (!strcmp(sym->name, "init_module") ||
 		     !strcmp(sym->name, "cleanup_module"))) {
@@ -856,14 +853,13 @@ static int create_cfi_sections(struct objtool_file *file)
 
 	sec = find_section_by_name(file->elf, ".cfi_sites");
 	if (sec) {
-		INIT_LIST_HEAD(&file->call_list);
 		WARN("file already has .cfi_sites section, skipping");
 		return 0;
 	}
 
 	idx = 0;
-	for_each_sym(file, sym) {
-		if (sym->type != STT_FUNC)
+	for_each_sym(file->elf, sym) {
+		if (!is_func_sym(sym))
 			continue;
 
 		if (strncmp(sym->name, "__cfi_", 6))
@@ -878,8 +874,8 @@ static int create_cfi_sections(struct objtool_file *file)
 		return -1;
 
 	idx = 0;
-	for_each_sym(file, sym) {
-		if (sym->type != STT_FUNC)
+	for_each_sym(file->elf, sym) {
+		if (!is_func_sym(sym))
 			continue;
 
 		if (strncmp(sym->name, "__cfi_", 6))
@@ -905,8 +901,13 @@ static int create_mcount_loc_sections(struct objtool_file *file)
 
 	sec = find_section_by_name(file->elf, "__mcount_loc");
 	if (sec) {
-		INIT_LIST_HEAD(&file->mcount_loc_list);
-		WARN("file already has __mcount_loc section, skipping");
+		/*
+		 * Livepatch modules have already extracted their __mcount_loc
+		 * entries to cover the !CONFIG_FTRACE_MCOUNT_USE_OBJTOOL case.
+		 */
+		if (!file->klp)
+			WARN("file already has __mcount_loc section, skipping");
+
 		return 0;
 	}
 
@@ -950,7 +951,6 @@ static int create_direct_call_sections(struct objtool_file *file)
 
 	sec = find_section_by_name(file->elf, ".call_sites");
 	if (sec) {
-		INIT_LIST_HEAD(&file->call_list);
 		WARN("file already has .call_sites section, skipping");
 		return 0;
 	}
@@ -981,6 +981,59 @@ static int create_direct_call_sections(struct objtool_file *file)
 	return 0;
 }
 
+#ifdef BUILD_KLP
+static int create_sym_checksum_section(struct objtool_file *file)
+{
+	struct section *sec;
+	struct symbol *sym;
+	unsigned int idx = 0;
+	struct sym_checksum *checksum;
+	size_t entsize = sizeof(struct sym_checksum);
+
+	sec = find_section_by_name(file->elf, ".discard.sym_checksum");
+	if (sec) {
+		if (!opts.dryrun)
+			WARN("file already has .discard.sym_checksum section, skipping");
+
+		return 0;
+	}
+
+	for_each_sym(file->elf, sym)
+		if (sym->csum.checksum)
+			idx++;
+
+	if (!idx)
+		return 0;
+
+	sec = elf_create_section_pair(file->elf, ".discard.sym_checksum", entsize,
+				      idx, idx);
+	if (!sec)
+		return -1;
+
+	idx = 0;
+	for_each_sym(file->elf, sym) {
+		if (!sym->csum.checksum)
+			continue;
+
+		if (!elf_init_reloc(file->elf, sec->rsec, idx, idx * entsize,
+				    sym, 0, R_TEXT64))
+			return -1;
+
+		checksum = (struct sym_checksum *)sec->data->d_buf + idx;
+		checksum->addr = 0; /* reloc */
+		checksum->checksum = sym->csum.checksum;
+
+		mark_sec_changed(file->elf, sec, true);
+
+		idx++;
+	}
+
+	return 0;
+}
+#else
+static int create_sym_checksum_section(struct objtool_file *file) { return -EINVAL; }
+#endif
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -1432,9 +1485,14 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn,
 }
 
 static bool is_first_func_insn(struct objtool_file *file,
-			       struct instruction *insn, struct symbol *sym)
+			       struct instruction *insn)
 {
-	if (insn->offset == sym->offset)
+	struct symbol *func = insn_func(insn);
+
+	if (!func)
+		return false;
+
+	if (insn->offset == func->offset)
 		return true;
 
 	/* Allow direct CALL/JMP past ENDBR */
@@ -1442,7 +1500,7 @@ static bool is_first_func_insn(struct objtool_file *file,
 		struct instruction *prev = prev_insn_same_sym(file, insn);
 
 		if (prev && prev->type == INSN_ENDBR &&
-		    insn->offset == sym->offset + prev->len)
+		    insn->offset == func->offset + prev->len)
 			return true;
 	}
 
@@ -1450,44 +1508,22 @@ static bool is_first_func_insn(struct objtool_file *file,
 }
 
 /*
- * A sibling call is a tail-call to another symbol -- to differentiate from a
- * recursive tail-call which is to the same symbol.
- */
-static bool jump_is_sibling_call(struct objtool_file *file,
-				 struct instruction *from, struct instruction *to)
-{
-	struct symbol *fs = from->sym;
-	struct symbol *ts = to->sym;
-
-	/* Not a sibling call if from/to a symbol hole */
-	if (!fs || !ts)
-		return false;
-
-	/* Not a sibling call if not targeting the start of a symbol. */
-	if (!is_first_func_insn(file, to, ts))
-		return false;
-
-	/* Disallow sibling calls into STT_NOTYPE */
-	if (ts->type == STT_NOTYPE)
-		return false;
-
-	/* Must not be self to be a sibling */
-	return fs->pfunc != ts->pfunc;
-}
-
-/*
  * Find the destination instructions for all jumps.
  */
 static int add_jump_destinations(struct objtool_file *file)
 {
-	struct instruction *insn, *jump_dest;
+	struct instruction *insn;
 	struct reloc *reloc;
-	struct section *dest_sec;
-	unsigned long dest_off;
-	int ret;
 
 	for_each_insn(file, insn) {
 		struct symbol *func = insn_func(insn);
+		struct instruction *dest_insn;
+		struct section *dest_sec;
+		struct symbol *dest_sym;
+		unsigned long dest_off;
+
+		if (!is_static_jump(insn))
+			continue;
 
 		if (insn->jump_dest) {
 			/*
@@ -1496,53 +1532,53 @@ static int add_jump_destinations(struct objtool_file *file)
 			 */
 			continue;
 		}
-		if (!is_static_jump(insn))
-			continue;
 
 		reloc = insn_reloc(file, insn);
 		if (!reloc) {
 			dest_sec = insn->sec;
 			dest_off = arch_jump_destination(insn);
-		} else if (reloc->sym->type == STT_SECTION) {
-			dest_sec = reloc->sym->sec;
-			dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
-		} else if (reloc->sym->retpoline_thunk) {
-			ret = add_retpoline_call(file, insn);
-			if (ret)
-				return ret;
-			continue;
-		} else if (reloc->sym->return_thunk) {
-			add_return_call(file, insn, true);
-			continue;
-		} else if (func) {
-			/*
-			 * External sibling call or internal sibling call with
-			 * STT_FUNC reloc.
-			 */
-			ret = add_call_dest(file, insn, reloc->sym, true);
-			if (ret)
-				return ret;
-			continue;
-		} else if (reloc->sym->sec->idx) {
-			dest_sec = reloc->sym->sec;
-			dest_off = reloc->sym->sym.st_value +
-				   arch_dest_reloc_offset(reloc_addend(reloc));
+			dest_sym = dest_sec->sym;
 		} else {
-			/* non-func asm code jumping to another file */
-			continue;
+			dest_sym = reloc->sym;
+			if (is_undef_sym(dest_sym)) {
+				if (dest_sym->retpoline_thunk) {
+					if (add_retpoline_call(file, insn))
+						return -1;
+					continue;
+				}
+
+				if (dest_sym->return_thunk) {
+					add_return_call(file, insn, true);
+					continue;
+				}
+
+				/* External symbol */
+				if (func) {
+					/* External sibling call */
+					if (add_call_dest(file, insn, dest_sym, true))
+						return -1;
+					continue;
+				}
+
+				/* Non-func asm code jumping to external symbol */
+				continue;
+			}
+
+			dest_sec = dest_sym->sec;
+			dest_off = dest_sym->offset + arch_insn_adjusted_addend(insn, reloc);
 		}
 
-		jump_dest = find_insn(file, dest_sec, dest_off);
-		if (!jump_dest) {
+		dest_insn = find_insn(file, dest_sec, dest_off);
+		if (!dest_insn) {
 			struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
 
 			/*
-			 * This is a special case for retbleed_untrain_ret().
-			 * It jumps to __x86_return_thunk(), but objtool
-			 * can't find the thunk's starting RET
-			 * instruction, because the RET is also in the
-			 * middle of another instruction.  Objtool only
-			 * knows about the outer instruction.
+			 * retbleed_untrain_ret() jumps to
+			 * __x86_return_thunk(), but objtool can't find
+			 * the thunk's starting RET instruction,
+			 * because the RET is also in the middle of
+			 * another instruction.  Objtool only knows
+			 * about the outer instruction.
 			 */
 			if (sym && sym->embedded_insn) {
 				add_return_call(file, insn, false);
@@ -1550,76 +1586,52 @@ static int add_jump_destinations(struct objtool_file *file)
 			}
 
 			/*
-			 * GCOV/KCOV dead code can jump to the end of the
-			 * function/section.
+			 * GCOV/KCOV dead code can jump to the end of
+			 * the function/section.
 			 */
 			if (file->ignore_unreachables && func &&
 			    dest_sec == insn->sec &&
 			    dest_off == func->offset + func->len)
 				continue;
 
-			ERROR_INSN(insn, "can't find jump dest instruction at %s+0x%lx",
-				   dest_sec->name, dest_off);
+			ERROR_INSN(insn, "can't find jump dest instruction at %s",
+				   offstr(dest_sec, dest_off));
 			return -1;
 		}
 
-		/*
-		 * An intra-TU jump in retpoline.o might not have a relocation
-		 * for its jump dest, in which case the above
-		 * add_{retpoline,return}_call() didn't happen.
-		 */
-		if (jump_dest->sym && jump_dest->offset == jump_dest->sym->offset) {
-			if (jump_dest->sym->retpoline_thunk) {
-				ret = add_retpoline_call(file, insn);
-				if (ret)
-					return ret;
-				continue;
-			}
-			if (jump_dest->sym->return_thunk) {
-				add_return_call(file, insn, true);
-				continue;
-			}
+		if (!dest_sym || is_sec_sym(dest_sym)) {
+			dest_sym = dest_insn->sym;
+			if (!dest_sym)
+				goto set_jump_dest;
 		}
 
-		/*
-		 * Cross-function jump.
-		 */
-		if (func && insn_func(jump_dest) && func != insn_func(jump_dest)) {
+		if (dest_sym->retpoline_thunk && dest_insn->offset == dest_sym->offset) {
+			if (add_retpoline_call(file, insn))
+				return -1;
+			continue;
+		}
 
-			/*
-			 * For GCC 8+, create parent/child links for any cold
-			 * subfunctions.  This is _mostly_ redundant with a
-			 * similar initialization in read_symbols().
-			 *
-			 * If a function has aliases, we want the *first* such
-			 * function in the symbol table to be the subfunction's
-			 * parent.  In that case we overwrite the
-			 * initialization done in read_symbols().
-			 *
-			 * However this code can't completely replace the
-			 * read_symbols() code because this doesn't detect the
-			 * case where the parent function's only reference to a
-			 * subfunction is through a jump table.
-			 */
-			if (!strstr(func->name, ".cold") &&
-			    strstr(insn_func(jump_dest)->name, ".cold")) {
-				func->cfunc = insn_func(jump_dest);
-				insn_func(jump_dest)->pfunc = func;
-			}
+		if (dest_sym->return_thunk && dest_insn->offset == dest_sym->offset) {
+			add_return_call(file, insn, true);
+			continue;
 		}
 
-		if (jump_is_sibling_call(file, insn, jump_dest)) {
-			/*
-			 * Internal sibling call without reloc or with
-			 * STT_SECTION reloc.
-			 */
-			ret = add_call_dest(file, insn, insn_func(jump_dest), true);
-			if (ret)
-				return ret;
+		if (!insn->sym || insn->sym->pfunc == dest_sym->pfunc)
+			goto set_jump_dest;
+
+		/*
+		 * Internal cross-function jump.
+		 */
+
+		if (is_first_func_insn(file, dest_insn)) {
+			/* Internal sibling call */
+			if (add_call_dest(file, insn, dest_sym, true))
+				return -1;
 			continue;
 		}
 
-		insn->jump_dest = jump_dest;
+set_jump_dest:
+		insn->jump_dest = dest_insn;
 	}
 
 	return 0;
@@ -1645,7 +1657,6 @@ static int add_call_destinations(struct objtool_file *file)
 	unsigned long dest_off;
 	struct symbol *dest;
 	struct reloc *reloc;
-	int ret;
 
 	for_each_insn(file, insn) {
 		struct symbol *func = insn_func(insn);
@@ -1657,9 +1668,8 @@ static int add_call_destinations(struct objtool_file *file)
 			dest_off = arch_jump_destination(insn);
 			dest = find_call_destination(insn->sec, dest_off);
 
-			ret = add_call_dest(file, insn, dest, false);
-			if (ret)
-				return ret;
+			if (add_call_dest(file, insn, dest, false))
+				return -1;
 
 			if (func && func->ignore)
 				continue;
@@ -1669,13 +1679,13 @@ static int add_call_destinations(struct objtool_file *file)
 				return -1;
 			}
 
-			if (func && insn_call_dest(insn)->type != STT_FUNC) {
+			if (func && !is_func_sym(insn_call_dest(insn))) {
 				ERROR_INSN(insn, "unsupported call to non-function");
 				return -1;
 			}
 
-		} else if (reloc->sym->type == STT_SECTION) {
-			dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
+		} else if (is_sec_sym(reloc->sym)) {
+			dest_off = arch_insn_adjusted_addend(insn, reloc);
 			dest = find_call_destination(reloc->sym->sec, dest_off);
 			if (!dest) {
 				ERROR_INSN(insn, "can't find call dest symbol at %s+0x%lx",
@@ -1683,19 +1693,16 @@ static int add_call_destinations(struct objtool_file *file)
 				return -1;
 			}
 
-			ret = add_call_dest(file, insn, dest, false);
-			if (ret)
-				return ret;
+			if (add_call_dest(file, insn, dest, false))
+				return -1;
 
 		} else if (reloc->sym->retpoline_thunk) {
-			ret = add_retpoline_call(file, insn);
-			if (ret)
-				return ret;
+			if (add_retpoline_call(file, insn))
+				return -1;
 
 		} else {
-			ret = add_call_dest(file, insn, reloc->sym, false);
-			if (ret)
-				return ret;
+			if (add_call_dest(file, insn, reloc->sym, false))
+				return -1;
 		}
 	}
 
@@ -1744,6 +1751,7 @@ static int handle_group_alt(struct objtool_file *file,
 		orig_alt_group->last_insn = last_orig_insn;
 		orig_alt_group->nop = NULL;
 		orig_alt_group->ignore = orig_insn->ignore_alts;
+		orig_alt_group->feature = 0;
 	} else {
 		if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
 		    orig_alt_group->first_insn->offset != special_alt->orig_len) {
@@ -1783,6 +1791,7 @@ static int handle_group_alt(struct objtool_file *file,
 		nop->type = INSN_NOP;
 		nop->sym = orig_insn->sym;
 		nop->alt_group = new_alt_group;
+		nop->fake = 1;
 	}
 
 	if (!special_alt->new_len) {
@@ -1847,6 +1856,7 @@ end:
 	new_alt_group->nop = nop;
 	new_alt_group->ignore = (*new_insn)->ignore_alts;
 	new_alt_group->cfi = orig_alt_group->cfi;
+	new_alt_group->feature = special_alt->feature;
 	return 0;
 }
 
@@ -1911,8 +1921,9 @@ static int add_special_section_alts(struct objtool_file *file)
 	struct list_head special_alts;
 	struct instruction *orig_insn, *new_insn;
 	struct special_alt *special_alt, *tmp;
+	enum alternative_type alt_type;
 	struct alternative *alt;
-	int ret;
+	struct alternative *a;
 
 	if (special_get_alts(file->elf, &special_alts))
 		return -1;
@@ -1944,16 +1955,18 @@ static int add_special_section_alts(struct objtool_file *file)
 				continue;
 			}
 
-			ret = handle_group_alt(file, special_alt, orig_insn,
-					       &new_insn);
-			if (ret)
-				return ret;
+			if (handle_group_alt(file, special_alt, orig_insn, &new_insn))
+				return -1;
+
+			alt_type = ALT_TYPE_INSTRUCTIONS;
 
 		} else if (special_alt->jump_or_nop) {
-			ret = handle_jump_alt(file, special_alt, orig_insn,
-					      &new_insn);
-			if (ret)
-				return ret;
+			if (handle_jump_alt(file, special_alt, orig_insn, &new_insn))
+				return -1;
+
+			alt_type = ALT_TYPE_JUMP_TABLE;
+		} else {
+			alt_type = ALT_TYPE_EX_TABLE;
 		}
 
 		alt = calloc(1, sizeof(*alt));
@@ -1963,8 +1976,20 @@ static int add_special_section_alts(struct objtool_file *file)
 		}
 
 		alt->insn = new_insn;
-		alt->next = orig_insn->alts;
-		orig_insn->alts = alt;
+		alt->type = alt_type;
+		alt->next = NULL;
+
+		/*
+		 * Store alternatives in the same order they have been
+		 * defined.
+		 */
+		if (!orig_insn->alts) {
+			orig_insn->alts = alt;
+		} else {
+			for (a = orig_insn->alts; a->next; a = a->next)
+				;
+			a->next = alt;
+		}
 
 		list_del(&special_alt->list);
 		free(special_alt);
@@ -2141,15 +2166,13 @@ static int add_func_jump_tables(struct objtool_file *file,
 				  struct symbol *func)
 {
 	struct instruction *insn;
-	int ret;
 
 	func_for_each_insn(file, func, insn) {
 		if (!insn_jump_table(insn))
 			continue;
 
-		ret = add_jump_table(file, insn);
-		if (ret)
-			return ret;
+		if (add_jump_table(file, insn))
+			return -1;
 	}
 
 	return 0;
@@ -2163,19 +2186,17 @@ static int add_func_jump_tables(struct objtool_file *file,
 static int add_jump_table_alts(struct objtool_file *file)
 {
 	struct symbol *func;
-	int ret;
 
 	if (!file->rodata)
 		return 0;
 
-	for_each_sym(file, func) {
-		if (func->type != STT_FUNC)
+	for_each_sym(file->elf, func) {
+		if (!is_func_sym(func) || func->alias != func)
 			continue;
 
 		mark_func_jump_tables(file, func);
-		ret = add_func_jump_tables(file, func);
-		if (ret)
-			return ret;
+		if (add_func_jump_tables(file, func))
+			return -1;
 	}
 
 	return 0;
@@ -2209,14 +2230,14 @@ static int read_unwind_hints(struct objtool_file *file)
 		return -1;
 	}
 
-	if (sec->sh.sh_size % sizeof(struct unwind_hint)) {
+	if (sec_size(sec) % sizeof(struct unwind_hint)) {
 		ERROR("struct unwind_hint size mismatch");
 		return -1;
 	}
 
 	file->hints = true;
 
-	for (i = 0; i < sec->sh.sh_size / sizeof(struct unwind_hint); i++) {
+	for (i = 0; i < sec_size(sec) / sizeof(struct unwind_hint); i++) {
 		hint = (struct unwind_hint *)sec->data->d_buf + i;
 
 		reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint));
@@ -2225,14 +2246,7 @@ static int read_unwind_hints(struct objtool_file *file)
 			return -1;
 		}
 
-		if (reloc->sym->type == STT_SECTION) {
-			offset = reloc_addend(reloc);
-		} else if (reloc->sym->local_label) {
-			offset = reloc->sym->offset;
-		} else {
-			ERROR("unexpected relocation symbol type in %s", sec->rsec->name);
-			return -1;
-		}
+		offset = reloc->sym->offset + reloc_addend(reloc);
 
 		insn = find_insn(file, reloc->sym->sec, offset);
 		if (!insn) {
@@ -2261,7 +2275,7 @@ static int read_unwind_hints(struct objtool_file *file)
 		if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
 			struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
 
-			if (sym && sym->bind == STB_GLOBAL) {
+			if (sym && is_global_sym(sym)) {
 				if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
 					ERROR_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR");
 					return -1;
@@ -2299,7 +2313,7 @@ static int read_annotate(struct objtool_file *file,
 	struct instruction *insn;
 	struct reloc *reloc;
 	uint64_t offset;
-	int type, ret;
+	int type;
 
 	sec = find_section_by_name(file->elf, ".discard.annotate_insn");
 	if (!sec)
@@ -2317,10 +2331,13 @@ static int read_annotate(struct objtool_file *file,
 		sec->sh.sh_entsize = 8;
 	}
 
-	for_each_reloc(sec->rsec, reloc) {
-		type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4);
-		type = bswap_if_needed(file->elf, type);
+	if (sec_num_entries(sec) != sec_num_entries(sec->rsec)) {
+		ERROR("bad .discard.annotate_insn section: missing relocs");
+		return -1;
+	}
 
+	for_each_reloc(sec->rsec, reloc) {
+		type = annotype(file->elf, sec, reloc);
 		offset = reloc->sym->offset + reloc_addend(reloc);
 		insn = find_insn(file, reloc->sym->sec, offset);
 
@@ -2329,9 +2346,8 @@ static int read_annotate(struct objtool_file *file,
 			return -1;
 		}
 
-		ret = func(file, type, insn);
-		if (ret < 0)
-			return ret;
+		if (func(file, type, insn))
+			return -1;
 	}
 
 	return 0;
@@ -2470,12 +2486,13 @@ static bool is_profiling_func(const char *name)
 static int classify_symbols(struct objtool_file *file)
 {
 	struct symbol *func;
+	size_t len;
 
-	for_each_sym(file, func) {
-		if (func->type == STT_NOTYPE && strstarts(func->name, ".L"))
+	for_each_sym(file->elf, func) {
+		if (is_notype_sym(func) && strstarts(func->name, ".L"))
 			func->local_label = true;
 
-		if (func->bind != STB_GLOBAL)
+		if (!is_global_sym(func))
 			continue;
 
 		if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
@@ -2496,6 +2513,10 @@ static int classify_symbols(struct objtool_file *file)
 
 		if (is_profiling_func(func->name))
 			func->profiling_func = true;
+
+		len = strlen(func->name);
+		if (len > sym_name_max_len)
+			sym_name_max_len = len;
 	}
 
 	return 0;
@@ -2516,7 +2537,7 @@ static void mark_rodata(struct objtool_file *file)
 	 *
 	 * .rodata.str1.* sections are ignored; they don't contain jump tables.
 	 */
-	for_each_sec(file, sec) {
+	for_each_sec(file->elf, sec) {
 		if ((!strncmp(sec->name, ".rodata", 7) &&
 		     !strstr(sec->name, ".str1.")) ||
 		    !strncmp(sec->name, ".data.rel.ro", 12)) {
@@ -2528,78 +2549,115 @@ static void mark_rodata(struct objtool_file *file)
 	file->rodata = found;
 }
 
+static void mark_holes(struct objtool_file *file)
+{
+	struct instruction *insn;
+	bool in_hole = false;
+
+	if (!opts.link)
+		return;
+
+	/*
+	 * Whole archive runs might encounter dead code from weak symbols.
+	 * This is where the linker will have dropped the weak symbol in
+	 * favour of a regular symbol, but leaves the code in place.
+	 */
+	for_each_insn(file, insn) {
+		if (insn->sym || !find_symbol_hole_containing(insn->sec, insn->offset)) {
+			in_hole = false;
+			continue;
+		}
+
+		/* Skip function padding and pfx code */
+		if (!in_hole && insn->type == INSN_NOP)
+			continue;
+
+		in_hole = true;
+		insn->hole = 1;
+
+		/*
+		 * If this hole jumps to a .cold function, mark it ignore.
+		 */
+		if (insn->jump_dest) {
+			struct symbol *dest_func = insn_func(insn->jump_dest);
+
+			if (dest_func && dest_func->cold)
+				dest_func->ignore = true;
+		}
+	}
+}
+
+static bool validate_branch_enabled(void)
+{
+	return opts.stackval ||
+	       opts.orc ||
+	       opts.uaccess ||
+	       opts.checksum;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
-	int ret;
+	file->klp = is_livepatch_module(file);
 
 	mark_rodata(file);
 
-	ret = init_pv_ops(file);
-	if (ret)
-		return ret;
+	if (init_pv_ops(file))
+		return -1;
 
 	/*
 	 * Must be before add_{jump_call}_destination.
 	 */
-	ret = classify_symbols(file);
-	if (ret)
-		return ret;
+	if (classify_symbols(file))
+		return -1;
 
-	ret = decode_instructions(file);
-	if (ret)
-		return ret;
+	if (decode_instructions(file))
+		return -1;
 
-	ret = add_ignores(file);
-	if (ret)
-		return ret;
+	if (add_ignores(file))
+		return -1;
 
 	add_uaccess_safe(file);
 
-	ret = read_annotate(file, __annotate_early);
-	if (ret)
-		return ret;
+	if (read_annotate(file, __annotate_early))
+		return -1;
 
 	/*
 	 * Must be before add_jump_destinations(), which depends on 'func'
 	 * being set for alternatives, to enable proper sibling call detection.
 	 */
-	if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
-		ret = add_special_section_alts(file);
-		if (ret)
-			return ret;
+	if (validate_branch_enabled() || opts.noinstr || opts.hack_jump_label || opts.disas) {
+		if (add_special_section_alts(file))
+			return -1;
 	}
 
-	ret = add_jump_destinations(file);
-	if (ret)
-		return ret;
+	if (add_jump_destinations(file))
+		return -1;
 
 	/*
 	 * Must be before add_call_destination(); it changes INSN_CALL to
 	 * INSN_JUMP.
 	 */
-	ret = read_annotate(file, __annotate_ifc);
-	if (ret)
-		return ret;
+	if (read_annotate(file, __annotate_ifc))
+		return -1;
 
-	ret = add_call_destinations(file);
-	if (ret)
-		return ret;
+	if (add_call_destinations(file))
+		return -1;
 
-	ret = add_jump_table_alts(file);
-	if (ret)
-		return ret;
+	if (add_jump_table_alts(file))
+		return -1;
 
-	ret = read_unwind_hints(file);
-	if (ret)
-		return ret;
+	if (read_unwind_hints(file))
+		return -1;
+
+	/* Must be after add_jump_destinations() */
+	mark_holes(file);
 
 	/*
 	 * Must be after add_call_destinations() such that it can override
 	 * dead_end_function() marks.
 	 */
-	ret = read_annotate(file, __annotate_late);
-	if (ret)
-		return ret;
+	if (read_annotate(file, __annotate_late))
+		return -1;
 
 	return 0;
 }
@@ -3353,7 +3411,7 @@ static bool pv_call_dest(struct objtool_file *file, struct instruction *insn)
 	if (!reloc || strcmp(reloc->sym->name, "pv_ops"))
 		return false;
 
-	idx = (arch_dest_reloc_offset(reloc_addend(reloc)) / sizeof(void *));
+	idx = arch_insn_adjusted_addend(insn, reloc) / sizeof(void *);
 
 	if (file->pv_ops[idx].clean)
 		return true;
@@ -3515,9 +3573,14 @@ static bool skip_alt_group(struct instruction *insn)
 {
 	struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL;
 
+	if (!insn->alt_group)
+		return false;
+
 	/* ANNOTATE_IGNORE_ALTERNATIVE */
-	if (insn->alt_group && insn->alt_group->ignore)
+	if (insn->alt_group->ignore) {
+		TRACE_ALT(insn, "alt group ignored");
 		return true;
+	}
 
 	/*
 	 * For NOP patched with CLAC/STAC, only follow the latter to avoid
@@ -3539,258 +3602,404 @@ static bool skip_alt_group(struct instruction *insn)
 	return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC;
 }
 
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps).  Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/objtool.txt.
- */
-static int validate_branch(struct objtool_file *file, struct symbol *func,
-			   struct instruction *insn, struct insn_state state)
+static int checksum_debug_init(struct objtool_file *file)
 {
-	struct alternative *alt;
-	struct instruction *next_insn, *prev_insn = NULL;
-	struct section *sec;
-	u8 visited;
-	int ret;
+	char *dup, *s;
 
-	if (func && func->ignore)
+	if (!opts.debug_checksum)
 		return 0;
 
-	sec = insn->sec;
+	dup = strdup(opts.debug_checksum);
+	if (!dup) {
+		ERROR_GLIBC("strdup");
+		return -1;
+	}
 
-	while (1) {
-		next_insn = next_insn_to_validate(file, insn);
+	s = dup;
+	while (*s) {
+		struct symbol *func;
+		char *comma;
 
-		if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
-			/* Ignore KCFI type preambles, which always fall through */
-			if (!strncmp(func->name, "__cfi_", 6) ||
-			    !strncmp(func->name, "__pfx_", 6) ||
-			    !strncmp(func->name, "__pi___cfi_", 11) ||
-			    !strncmp(func->name, "__pi___pfx_", 11))
-				return 0;
+		comma = strchr(s, ',');
+		if (comma)
+			*comma = '\0';
 
-			if (file->ignore_unreachables)
-				return 0;
+		func = find_symbol_by_name(file->elf, s);
+		if (!func || !is_func_sym(func))
+			WARN("--debug-checksum: can't find '%s'", s);
+		else
+			func->debug_checksum = 1;
 
-			WARN("%s() falls through to next function %s()",
-			     func->name, insn_func(insn)->name);
-			func->warned = 1;
+		if (!comma)
+			break;
 
-			return 1;
-		}
+		s = comma + 1;
+	}
 
-		visited = VISITED_BRANCH << state.uaccess;
-		if (insn->visited & VISITED_BRANCH_MASK) {
-			if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
-				return 1;
+	free(dup);
+	return 0;
+}
 
-			if (insn->visited & visited)
-				return 0;
-		} else {
-			nr_insns_visited++;
-		}
+static void checksum_update_insn(struct objtool_file *file, struct symbol *func,
+				 struct instruction *insn)
+{
+	struct reloc *reloc = insn_reloc(file, insn);
+	unsigned long offset;
+	struct symbol *sym;
 
-		if (state.noinstr)
-			state.instr += insn->instr;
+	if (insn->fake)
+		return;
 
-		if (insn->hint) {
-			if (insn->restore) {
-				struct instruction *save_insn, *i;
+	checksum_update(func, insn, insn->sec->data->d_buf + insn->offset, insn->len);
 
-				i = insn;
-				save_insn = NULL;
+	if (!reloc) {
+		struct symbol *call_dest = insn_call_dest(insn);
 
-				sym_for_each_insn_continue_reverse(file, func, i) {
-					if (i->save) {
-						save_insn = i;
-						break;
-					}
-				}
+		if (call_dest)
+			checksum_update(func, insn, call_dest->demangled_name,
+					strlen(call_dest->demangled_name));
+		return;
+	}
 
-				if (!save_insn) {
-					WARN_INSN(insn, "no corresponding CFI save for CFI restore");
-					return 1;
+	sym = reloc->sym;
+	offset = arch_insn_adjusted_addend(insn, reloc);
+
+	if (is_string_sec(sym->sec)) {
+		char *str;
+
+		str = sym->sec->data->d_buf + sym->offset + offset;
+		checksum_update(func, insn, str, strlen(str));
+		return;
+	}
+
+	if (is_sec_sym(sym)) {
+		sym = find_symbol_containing(reloc->sym->sec, offset);
+		if (!sym)
+			return;
+
+		offset -= sym->offset;
+	}
+
+	checksum_update(func, insn, sym->demangled_name, strlen(sym->demangled_name));
+	checksum_update(func, insn, &offset, sizeof(offset));
+}
+
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+			   struct instruction *insn, struct insn_state state);
+static int do_validate_branch(struct objtool_file *file, struct symbol *func,
+			      struct instruction *insn, struct insn_state state);
+
+static int validate_insn(struct objtool_file *file, struct symbol *func,
+			 struct instruction *insn, struct insn_state *statep,
+			 struct instruction *prev_insn, struct instruction *next_insn,
+			 bool *dead_end)
+{
+	/* prev_state and alt_name are not used if there is no disassembly support */
+	struct insn_state prev_state __maybe_unused;
+	char *alt_name __maybe_unused = NULL;
+	struct alternative *alt;
+	u8 visited;
+	int ret;
+
+	/*
+	 * Any returns before the end of this function are effectively dead
+	 * ends, i.e. validate_branch() has reached the end of the branch.
+	 */
+	*dead_end = true;
+
+	visited = VISITED_BRANCH << statep->uaccess;
+	if (insn->visited & VISITED_BRANCH_MASK) {
+		if (!insn->hint && !insn_cfi_match(insn, &statep->cfi))
+			return 1;
+
+		if (insn->visited & visited) {
+			TRACE_INSN(insn, "already visited");
+			return 0;
+		}
+	} else {
+		nr_insns_visited++;
+	}
+
+	if (statep->noinstr)
+		statep->instr += insn->instr;
+
+	if (insn->hint) {
+		if (insn->restore) {
+			struct instruction *save_insn, *i;
+
+			i = insn;
+			save_insn = NULL;
+
+			sym_for_each_insn_continue_reverse(file, func, i) {
+				if (i->save) {
+					save_insn = i;
+					break;
 				}
+			}
 
-				if (!save_insn->visited) {
-					/*
-					 * If the restore hint insn is at the
-					 * beginning of a basic block and was
-					 * branched to from elsewhere, and the
-					 * save insn hasn't been visited yet,
-					 * defer following this branch for now.
-					 * It will be seen later via the
-					 * straight-line path.
-					 */
-					if (!prev_insn)
-						return 0;
+			if (!save_insn) {
+				WARN_INSN(insn, "no corresponding CFI save for CFI restore");
+				return 1;
+			}
 
-					WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
-					return 1;
+			if (!save_insn->visited) {
+				/*
+				 * If the restore hint insn is at the
+				 * beginning of a basic block and was
+				 * branched to from elsewhere, and the
+				 * save insn hasn't been visited yet,
+				 * defer following this branch for now.
+				 * It will be seen later via the
+				 * straight-line path.
+				 */
+				if (!prev_insn) {
+					TRACE_INSN(insn, "defer restore");
+					return 0;
 				}
 
-				insn->cfi = save_insn->cfi;
-				nr_cfi_reused++;
+				WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
+				return 1;
 			}
 
-			state.cfi = *insn->cfi;
+			insn->cfi = save_insn->cfi;
+			nr_cfi_reused++;
+		}
+
+		statep->cfi = *insn->cfi;
+	} else {
+		/* XXX track if we actually changed statep->cfi */
+
+		if (prev_insn && !cficmp(prev_insn->cfi, &statep->cfi)) {
+			insn->cfi = prev_insn->cfi;
+			nr_cfi_reused++;
 		} else {
-			/* XXX track if we actually changed state.cfi */
+			insn->cfi = cfi_hash_find_or_add(&statep->cfi);
+		}
+	}
 
-			if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) {
-				insn->cfi = prev_insn->cfi;
-				nr_cfi_reused++;
-			} else {
-				insn->cfi = cfi_hash_find_or_add(&state.cfi);
+	insn->visited |= visited;
+
+	if (propagate_alt_cfi(file, insn))
+		return 1;
+
+	if (insn->alts) {
+		for (alt = insn->alts; alt; alt = alt->next) {
+			TRACE_ALT_BEGIN(insn, alt, alt_name);
+			ret = validate_branch(file, func, alt->insn, *statep);
+			TRACE_ALT_END(insn, alt, alt_name);
+			if (ret) {
+				BT_INSN(insn, "(alt)");
+				return ret;
 			}
 		}
+		TRACE_ALT_INFO_NOADDR(insn, "/ ", "DEFAULT");
+	}
+
+	if (skip_alt_group(insn))
+		return 0;
+
+	prev_state = *statep;
+	ret = handle_insn_ops(insn, next_insn, statep);
+	TRACE_INSN_STATE(insn, &prev_state, statep);
+
+	if (ret)
+		return 1;
+
+	switch (insn->type) {
+
+	case INSN_RETURN:
+		TRACE_INSN(insn, "return");
+		return validate_return(func, insn, statep);
+
+	case INSN_CALL:
+	case INSN_CALL_DYNAMIC:
+		if (insn->type == INSN_CALL)
+			TRACE_INSN(insn, "call");
+		else
+			TRACE_INSN(insn, "indirect call");
 
-		insn->visited |= visited;
+		ret = validate_call(file, insn, statep);
+		if (ret)
+			return ret;
 
-		if (propagate_alt_cfi(file, insn))
+		if (opts.stackval && func && !is_special_call(insn) &&
+		    !has_valid_stack_frame(statep)) {
+			WARN_INSN(insn, "call without frame pointer save/setup");
 			return 1;
+		}
 
-		if (insn->alts) {
-			for (alt = insn->alts; alt; alt = alt->next) {
-				ret = validate_branch(file, func, alt->insn, state);
-				if (ret) {
-					BT_INSN(insn, "(alt)");
-					return ret;
-				}
+		break;
+
+	case INSN_JUMP_CONDITIONAL:
+	case INSN_JUMP_UNCONDITIONAL:
+		if (is_sibling_call(insn)) {
+			TRACE_INSN(insn, "sibling call");
+			ret = validate_sibling_call(file, insn, statep);
+			if (ret)
+				return ret;
+
+		} else if (insn->jump_dest) {
+			if (insn->type == INSN_JUMP_UNCONDITIONAL)
+				TRACE_INSN(insn, "unconditional jump");
+			else
+				TRACE_INSN(insn, "jump taken");
+
+			ret = validate_branch(file, func, insn->jump_dest, *statep);
+			if (ret) {
+				BT_INSN(insn, "(branch)");
+				return ret;
 			}
 		}
 
-		if (skip_alt_group(insn))
+		if (insn->type == INSN_JUMP_UNCONDITIONAL)
 			return 0;
 
-		if (handle_insn_ops(insn, next_insn, &state))
-			return 1;
-
-		switch (insn->type) {
-
-		case INSN_RETURN:
-			return validate_return(func, insn, &state);
+		TRACE_INSN(insn, "jump not taken");
+		break;
 
-		case INSN_CALL:
-		case INSN_CALL_DYNAMIC:
-			ret = validate_call(file, insn, &state);
+	case INSN_JUMP_DYNAMIC:
+	case INSN_JUMP_DYNAMIC_CONDITIONAL:
+		TRACE_INSN(insn, "indirect jump");
+		if (is_sibling_call(insn)) {
+			ret = validate_sibling_call(file, insn, statep);
 			if (ret)
 				return ret;
+		}
 
-			if (opts.stackval && func && !is_special_call(insn) &&
-			    !has_valid_stack_frame(&state)) {
-				WARN_INSN(insn, "call without frame pointer save/setup");
-				return 1;
-			}
+		if (insn->type == INSN_JUMP_DYNAMIC)
+			return 0;
 
-			break;
+		break;
 
-		case INSN_JUMP_CONDITIONAL:
-		case INSN_JUMP_UNCONDITIONAL:
-			if (is_sibling_call(insn)) {
-				ret = validate_sibling_call(file, insn, &state);
-				if (ret)
-					return ret;
+	case INSN_SYSCALL:
+		TRACE_INSN(insn, "syscall");
+		if (func && (!next_insn || !next_insn->hint)) {
+			WARN_INSN(insn, "unsupported instruction in callable function");
+			return 1;
+		}
 
-			} else if (insn->jump_dest) {
-				ret = validate_branch(file, func,
-						      insn->jump_dest, state);
-				if (ret) {
-					BT_INSN(insn, "(branch)");
-					return ret;
-				}
-			}
+		break;
 
-			if (insn->type == INSN_JUMP_UNCONDITIONAL)
-				return 0;
+	case INSN_SYSRET:
+		TRACE_INSN(insn, "sysret");
+		if (func && (!next_insn || !next_insn->hint)) {
+			WARN_INSN(insn, "unsupported instruction in callable function");
+			return 1;
+		}
 
+		return 0;
+
+	case INSN_STAC:
+		TRACE_INSN(insn, "stac");
+		if (!opts.uaccess)
 			break;
 
-		case INSN_JUMP_DYNAMIC:
-		case INSN_JUMP_DYNAMIC_CONDITIONAL:
-			if (is_sibling_call(insn)) {
-				ret = validate_sibling_call(file, insn, &state);
-				if (ret)
-					return ret;
-			}
+		if (statep->uaccess) {
+			WARN_INSN(insn, "recursive UACCESS enable");
+			return 1;
+		}
 
-			if (insn->type == INSN_JUMP_DYNAMIC)
-				return 0;
+		statep->uaccess = true;
+		break;
 
+	case INSN_CLAC:
+		TRACE_INSN(insn, "clac");
+		if (!opts.uaccess)
 			break;
 
-		case INSN_SYSCALL:
-			if (func && (!next_insn || !next_insn->hint)) {
-				WARN_INSN(insn, "unsupported instruction in callable function");
-				return 1;
-			}
+		if (!statep->uaccess && func) {
+			WARN_INSN(insn, "redundant UACCESS disable");
+			return 1;
+		}
 
-			break;
+		if (func_uaccess_safe(func) && !statep->uaccess_stack) {
+			WARN_INSN(insn, "UACCESS-safe disables UACCESS");
+			return 1;
+		}
 
-		case INSN_SYSRET:
-			if (func && (!next_insn || !next_insn->hint)) {
-				WARN_INSN(insn, "unsupported instruction in callable function");
-				return 1;
-			}
+		statep->uaccess = false;
+		break;
 
-			return 0;
+	case INSN_STD:
+		TRACE_INSN(insn, "std");
+		if (statep->df) {
+			WARN_INSN(insn, "recursive STD");
+			return 1;
+		}
 
-		case INSN_STAC:
-			if (!opts.uaccess)
-				break;
+		statep->df = true;
+		break;
 
-			if (state.uaccess) {
-				WARN_INSN(insn, "recursive UACCESS enable");
-				return 1;
-			}
+	case INSN_CLD:
+		TRACE_INSN(insn, "cld");
+		if (!statep->df && func) {
+			WARN_INSN(insn, "redundant CLD");
+			return 1;
+		}
 
-			state.uaccess = true;
-			break;
+		statep->df = false;
+		break;
 
-		case INSN_CLAC:
-			if (!opts.uaccess)
-				break;
+	default:
+		break;
+	}
 
-			if (!state.uaccess && func) {
-				WARN_INSN(insn, "redundant UACCESS disable");
-				return 1;
-			}
+	if (insn->dead_end)
+		TRACE_INSN(insn, "dead end");
 
-			if (func_uaccess_safe(func) && !state.uaccess_stack) {
-				WARN_INSN(insn, "UACCESS-safe disables UACCESS");
-				return 1;
-			}
+	*dead_end = insn->dead_end;
+	return 0;
+}
 
-			state.uaccess = false;
-			break;
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps).  Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/objtool.txt.
+ */
+static int do_validate_branch(struct objtool_file *file, struct symbol *func,
+			      struct instruction *insn, struct insn_state state)
+{
+	struct instruction *next_insn, *prev_insn = NULL;
+	bool dead_end;
+	int ret;
 
-		case INSN_STD:
-			if (state.df) {
-				WARN_INSN(insn, "recursive STD");
-				return 1;
-			}
+	if (func && func->ignore)
+		return 0;
 
-			state.df = true;
-			break;
+	do {
+		insn->trace = 0;
+		next_insn = next_insn_to_validate(file, insn);
 
-		case INSN_CLD:
-			if (!state.df && func) {
-				WARN_INSN(insn, "redundant CLD");
-				return 1;
-			}
+		if (opts.checksum && func && insn->sec)
+			checksum_update_insn(file, func, insn);
 
-			state.df = false;
-			break;
+		if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
+			/* Ignore KCFI type preambles, which always fall through */
+			if (is_prefix_func(func))
+				return 0;
 
-		default:
-			break;
+			if (file->ignore_unreachables)
+				return 0;
+
+			WARN("%s() falls through to next function %s()",
+			     func->name, insn_func(insn)->name);
+			func->warned = 1;
+
+			return 1;
 		}
 
-		if (insn->dead_end)
-			return 0;
+		ret = validate_insn(file, func, insn, &state, prev_insn, next_insn,
+				    &dead_end);
+
+		if (!insn->trace) {
+			if (ret)
+				TRACE_INSN(insn, "warning (%d)", ret);
+			else
+				TRACE_INSN(insn, NULL);
+		}
 
-		if (!next_insn) {
+		if (!dead_end && !next_insn) {
 			if (state.cfi.cfa.base == CFI_UNDEFINED)
 				return 0;
 			if (file->ignore_unreachables)
@@ -3798,15 +4007,28 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
 			WARN("%s%sunexpected end of section %s",
 			     func ? func->name : "", func ? "(): " : "",
-			     sec->name);
+			     insn->sec->name);
 			return 1;
 		}
 
 		prev_insn = insn;
 		insn = next_insn;
-	}
 
-	return 0;
+	} while (!dead_end);
+
+	return ret;
+}
+
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+			   struct instruction *insn, struct insn_state state)
+{
+	int ret;
+
+	trace_depth_inc();
+	ret = do_validate_branch(file, func, insn, state);
+	trace_depth_dec();
+
+	return ret;
 }
 
 static int validate_unwind_hint(struct objtool_file *file,
@@ -3814,7 +4036,13 @@ static int validate_unwind_hint(struct objtool_file *file,
 				  struct insn_state *state)
 {
 	if (insn->hint && !insn->visited) {
-		int ret = validate_branch(file, insn_func(insn), insn, *state);
+		struct symbol *func = insn_func(insn);
+		int ret;
+
+		if (opts.checksum)
+			checksum_init(func);
+
+		ret = validate_branch(file, func, insn, *state);
 		if (ret)
 			BT_INSN(insn, "<=== (hint)");
 		return ret;
@@ -4058,7 +4286,8 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	struct instruction *prev_insn;
 	int i;
 
-	if (insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore))
+	if (insn->type == INSN_NOP || insn->type == INSN_TRAP ||
+	    insn->hole || (func && func->ignore))
 		return true;
 
 	/*
@@ -4069,47 +4298,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	    !strcmp(insn->sec->name, ".altinstr_aux"))
 		return true;
 
-	/*
-	 * Whole archive runs might encounter dead code from weak symbols.
-	 * This is where the linker will have dropped the weak symbol in
-	 * favour of a regular symbol, but leaves the code in place.
-	 *
-	 * In this case we'll find a piece of code (whole function) that is not
-	 * covered by a !section symbol. Ignore them.
-	 */
-	if (opts.link && !func) {
-		int size = find_symbol_hole_containing(insn->sec, insn->offset);
-		unsigned long end = insn->offset + size;
-
-		if (!size) /* not a hole */
-			return false;
-
-		if (size < 0) /* hole until the end */
-			return true;
-
-		sec_for_each_insn_continue(file, insn) {
-			/*
-			 * If we reach a visited instruction at or before the
-			 * end of the hole, ignore the unreachable.
-			 */
-			if (insn->visited)
-				return true;
-
-			if (insn->offset >= end)
-				break;
-
-			/*
-			 * If this hole jumps to a .cold function, mark it ignore too.
-			 */
-			if (insn->jump_dest && insn_func(insn->jump_dest) &&
-			    strstr(insn_func(insn->jump_dest)->name, ".cold")) {
-				insn_func(insn->jump_dest)->ignore = true;
-			}
-		}
-
-		return false;
-	}
-
 	if (!func)
 		return false;
 
@@ -4161,14 +4349,54 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	return false;
 }
 
-static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
+/*
+ * For FineIBT or kCFI, a certain number of bytes preceding the function may be
+ * NOPs.  Those NOPs may be rewritten at runtime and executed, so give them a
+ * proper function name: __pfx_<func>.
+ *
+ * The NOPs may not exist for the following cases:
+ *
+ *   - compiler cloned functions (*.cold, *.part0, etc)
+ *   - asm functions created with inline asm or without SYM_FUNC_START()
+ *
+ * Also, the function may already have a prefix from a previous objtool run
+ * (livepatch extracted functions, or manually running objtool multiple times).
+ *
+ * So return 0 if the NOPs are missing or the function already has a prefix
+ * symbol.
+ */
+static int create_prefix_symbol(struct objtool_file *file, struct symbol *func)
 {
 	struct instruction *insn, *prev;
+	char name[SYM_NAME_LEN];
 	struct cfi_state *cfi;
 
+	if (!is_func_sym(func) || is_prefix_func(func) ||
+	    func->cold || func->static_call_tramp)
+		return 0;
+
+	if ((strlen(func->name) + sizeof("__pfx_") > SYM_NAME_LEN)) {
+		WARN("%s: symbol name too long, can't create __pfx_ symbol",
+		      func->name);
+		return 0;
+	}
+
+	if (snprintf_check(name, SYM_NAME_LEN, "__pfx_%s", func->name))
+		return -1;
+
+	if (file->klp) {
+		struct symbol *pfx;
+
+		pfx = find_symbol_by_offset(func->sec, func->offset - opts.prefix);
+		if (pfx && is_prefix_func(pfx) && !strcmp(pfx->name, name))
+			return 0;
+	}
+
 	insn = find_insn(file, func->sec, func->offset);
-	if (!insn)
+	if (!insn) {
+		WARN("%s: can't find starting instruction", func->name);
 		return -1;
+	}
 
 	for (prev = prev_insn_same_sec(file, insn);
 	     prev;
@@ -4176,22 +4404,27 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
 		u64 offset;
 
 		if (prev->type != INSN_NOP)
-			return -1;
+			return 0;
 
 		offset = func->offset - prev->offset;
 
 		if (offset > opts.prefix)
-			return -1;
+			return 0;
 
 		if (offset < opts.prefix)
 			continue;
 
-		elf_create_prefix_symbol(file->elf, func, opts.prefix);
+		if (!elf_create_symbol(file->elf, name, func->sec,
+				       GELF_ST_BIND(func->sym.st_info),
+				       GELF_ST_TYPE(func->sym.st_info),
+				       prev->offset, opts.prefix))
+			return -1;
+
 		break;
 	}
 
 	if (!prev)
-		return -1;
+		return 0;
 
 	if (!insn->cfi) {
 		/*
@@ -4209,20 +4442,18 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
 	return 0;
 }
 
-static int add_prefix_symbols(struct objtool_file *file)
+static int create_prefix_symbols(struct objtool_file *file)
 {
 	struct section *sec;
 	struct symbol *func;
 
-	for_each_sec(file, sec) {
-		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+	for_each_sec(file->elf, sec) {
+		if (!is_text_sec(sec))
 			continue;
 
 		sec_for_each_sym(sec, func) {
-			if (func->type != STT_FUNC)
-				continue;
-
-			add_prefix_symbol(file, func);
+			if (create_prefix_symbol(file, func))
+				return -1;
 		}
 	}
 
@@ -4233,6 +4464,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
 			   struct symbol *sym, struct insn_state *state)
 {
 	struct instruction *insn;
+	struct symbol *func;
 	int ret;
 
 	if (!sym->len) {
@@ -4250,9 +4482,26 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
 	if (opts.uaccess)
 		state->uaccess = sym->uaccess_safe;
 
-	ret = validate_branch(file, insn_func(insn), insn, *state);
+	func = insn_func(insn);
+
+	if (opts.checksum)
+		checksum_init(func);
+
+	if (opts.trace && !fnmatch(opts.trace, sym->name, 0)) {
+		trace_enable();
+		TRACE("%s: validation begin\n", sym->name);
+	}
+
+	ret = validate_branch(file, func, insn, *state);
 	if (ret)
 		BT_INSN(insn, "<=== (sym)");
+
+	TRACE("%s: validation %s\n\n", sym->name, ret ? "failed" : "end");
+	trace_disable();
+
+	if (opts.checksum)
+		checksum_finish(func);
+
 	return ret;
 }
 
@@ -4263,7 +4512,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
 	int warnings = 0;
 
 	sec_for_each_sym(sec, func) {
-		if (func->type != STT_FUNC)
+		if (!is_func_sym(func))
 			continue;
 
 		init_insn_state(file, &state, sec);
@@ -4306,8 +4555,8 @@ static int validate_functions(struct objtool_file *file)
 	struct section *sec;
 	int warnings = 0;
 
-	for_each_sec(file, sec) {
-		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+	for_each_sec(file->elf, sec) {
+		if (!is_text_sec(sec))
 			continue;
 
 		warnings += validate_section(file, sec);
@@ -4434,12 +4683,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
 					      reloc_offset(reloc) + 1,
 					      (insn->offset + insn->len) - (reloc_offset(reloc) + 1))) {
 
-		off = reloc->sym->offset;
-		if (reloc_type(reloc) == R_X86_64_PC32 ||
-		    reloc_type(reloc) == R_X86_64_PLT32)
-			off += arch_dest_reloc_offset(reloc_addend(reloc));
-		else
-			off += reloc_addend(reloc);
+		off = reloc->sym->offset + arch_insn_adjusted_addend(insn, reloc);
 
 		dest = find_insn(file, reloc->sym->sec, off);
 		if (!dest)
@@ -4490,10 +4734,10 @@ static int validate_ibt(struct objtool_file *file)
 	for_each_insn(file, insn)
 		warnings += validate_ibt_insn(file, insn);
 
-	for_each_sec(file, sec) {
+	for_each_sec(file->elf, sec) {
 
 		/* Already done by validate_ibt_insn() */
-		if (sec->sh.sh_flags & SHF_EXECINSTR)
+		if (is_text_sec(sec))
 			continue;
 
 		if (!sec->rsec)
@@ -4508,8 +4752,8 @@ static int validate_ibt(struct objtool_file *file)
 		    !strncmp(sec->name, ".debug", 6)			||
 		    !strcmp(sec->name, ".altinstructions")		||
 		    !strcmp(sec->name, ".ibt_endbr_seal")		||
+		    !strcmp(sec->name, ".kcfi_traps")			||
 		    !strcmp(sec->name, ".orc_unwind_ip")		||
-		    !strcmp(sec->name, ".parainstructions")		||
 		    !strcmp(sec->name, ".retpoline_sites")		||
 		    !strcmp(sec->name, ".smp_locks")			||
 		    !strcmp(sec->name, ".static_call_sites")		||
@@ -4518,12 +4762,14 @@ static int validate_ibt(struct objtool_file *file)
 		    !strcmp(sec->name, "__bug_table")			||
 		    !strcmp(sec->name, "__ex_table")			||
 		    !strcmp(sec->name, "__jump_table")			||
+		    !strcmp(sec->name, "__klp_funcs")			||
 		    !strcmp(sec->name, "__mcount_loc")			||
-		    !strcmp(sec->name, ".kcfi_traps")			||
 		    !strcmp(sec->name, ".llvm.call-graph-profile")	||
 		    !strcmp(sec->name, ".llvm_bb_addr_map")		||
 		    !strcmp(sec->name, "__tracepoints")			||
-		    strstr(sec->name, "__patchable_function_entries"))
+		    !strcmp(sec->name, ".return_sites")			||
+		    !strcmp(sec->name, ".call_sites")			||
+		    !strcmp(sec->name, "__patchable_function_entries"))
 			continue;
 
 		for_each_reloc(sec->rsec, reloc)
@@ -4597,87 +4843,6 @@ static int validate_reachable_instructions(struct objtool_file *file)
 	return warnings;
 }
 
-/* 'funcs' is a space-separated list of function names */
-static void disas_funcs(const char *funcs)
-{
-	const char *objdump_str, *cross_compile;
-	int size, ret;
-	char *cmd;
-
-	cross_compile = getenv("CROSS_COMPILE");
-	if (!cross_compile)
-		cross_compile = "";
-
-	objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '"
-			"BEGIN { split(_funcs, funcs); }"
-			"/^$/ { func_match = 0; }"
-			"/<.*>:/ { "
-				"f = gensub(/.*<(.*)>:/, \"\\\\1\", 1);"
-				"for (i in funcs) {"
-					"if (funcs[i] == f) {"
-						"func_match = 1;"
-						"base = strtonum(\"0x\" $1);"
-						"break;"
-					"}"
-				"}"
-			"}"
-			"{"
-				"if (func_match) {"
-					"addr = strtonum(\"0x\" $1);"
-					"printf(\"%%04x \", addr - base);"
-					"print;"
-				"}"
-			"}' 1>&2";
-
-	/* fake snprintf() to calculate the size */
-	size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1;
-	if (size <= 0) {
-		WARN("objdump string size calculation failed");
-		return;
-	}
-
-	cmd = malloc(size);
-
-	/* real snprintf() */
-	snprintf(cmd, size, objdump_str, cross_compile, objname, funcs);
-	ret = system(cmd);
-	if (ret) {
-		WARN("disassembly failed: %d", ret);
-		return;
-	}
-}
-
-static void disas_warned_funcs(struct objtool_file *file)
-{
-	struct symbol *sym;
-	char *funcs = NULL, *tmp;
-
-	for_each_sym(file, sym) {
-		if (sym->warned) {
-			if (!funcs) {
-				funcs = malloc(strlen(sym->name) + 1);
-				if (!funcs) {
-					ERROR_GLIBC("malloc");
-					return;
-				}
-				strcpy(funcs, sym->name);
-			} else {
-				tmp = malloc(strlen(funcs) + strlen(sym->name) + 2);
-				if (!tmp) {
-					ERROR_GLIBC("malloc");
-					return;
-				}
-				sprintf(tmp, "%s %s", funcs, sym->name);
-				free(funcs);
-				funcs = tmp;
-			}
-		}
-	}
-
-	if (funcs)
-		disas_funcs(funcs);
-}
-
 __weak bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
 {
 	unsigned int type = reloc_type(reloc);
@@ -4692,7 +4857,7 @@ static int check_abs_references(struct objtool_file *file)
 	struct reloc *reloc;
 	int ret = 0;
 
-	for_each_sec(file, sec) {
+	for_each_sec(file->elf, sec) {
 		/* absolute references in non-loadable sections are fine */
 		if (!(sec->sh.sh_flags & SHF_ALLOC))
 			continue;
@@ -4710,8 +4875,8 @@ static int check_abs_references(struct objtool_file *file)
 
 		for_each_reloc(sec->rsec, reloc) {
 			if (arch_absolute_reloc(file->elf, reloc)) {
-				WARN("section %s has absolute relocation at offset 0x%lx",
-				     sec->name, reloc_offset(reloc));
+				WARN("section %s has absolute relocation at offset 0x%llx",
+				     sec->name, (unsigned long long)reloc_offset(reloc));
 				ret++;
 			}
 		}
@@ -4747,10 +4912,35 @@ static void free_insns(struct objtool_file *file)
 		free(chunk->addr);
 }
 
+const char *objtool_disas_insn(struct instruction *insn)
+{
+	struct disas_context *dctx = objtool_disas_ctx;
+
+	if (!dctx)
+		return "";
+
+	disas_insn(dctx, insn);
+	return disas_result(dctx);
+}
+
 int check(struct objtool_file *file)
 {
+	struct disas_context *disas_ctx = NULL;
 	int ret = 0, warnings = 0;
 
+	/*
+	 * Create a disassembly context if we might disassemble any
+	 * instruction or function.
+	 */
+	if (opts.verbose || opts.backtrace || opts.trace || opts.disas) {
+		disas_ctx = disas_context_create(file);
+		if (!disas_ctx) {
+			opts.disas = false;
+			opts.trace = false;
+		}
+		objtool_disas_ctx = disas_ctx;
+	}
+
 	arch_initial_func_cfi_state(&initial_func_cfi);
 	init_cfi_state(&init_cfi);
 	init_cfi_state(&func_cfi);
@@ -4766,6 +4956,10 @@ int check(struct objtool_file *file)
 	cfi_hash_add(&init_cfi);
 	cfi_hash_add(&func_cfi);
 
+	ret = checksum_debug_init(file);
+	if (ret)
+		goto out;
+
 	ret = decode_sections(file);
 	if (ret)
 		goto out;
@@ -4776,7 +4970,7 @@ int check(struct objtool_file *file)
 	if (opts.retpoline)
 		warnings += validate_retpoline(file);
 
-	if (opts.stackval || opts.orc || opts.uaccess) {
+	if (validate_branch_enabled()) {
 		int w = 0;
 
 		w += validate_functions(file);
@@ -4841,7 +5035,7 @@ int check(struct objtool_file *file)
 	}
 
 	if (opts.prefix) {
-		ret = add_prefix_symbols(file);
+		ret = create_prefix_symbols(file);
 		if (ret)
 			goto out;
 	}
@@ -4855,14 +5049,18 @@ int check(struct objtool_file *file)
 	if (opts.noabs)
 		warnings += check_abs_references(file);
 
+	if (opts.checksum) {
+		ret = create_sym_checksum_section(file);
+		if (ret)
+			goto out;
+	}
+
 	if (opts.orc && nr_insns) {
 		ret = orc_create(file);
 		if (ret)
 			goto out;
 	}
 
-	free_insns(file);
-
 	if (opts.stats) {
 		printf("nr_insns_visited: %ld\n", nr_insns_visited);
 		printf("nr_cfi: %ld\n", nr_cfi);
@@ -4871,18 +5069,32 @@ int check(struct objtool_file *file)
 	}
 
 out:
-	if (!ret && !warnings)
-		return 0;
+	if (ret || warnings) {
+		if (opts.werror && warnings)
+			ret = 1;
+
+		if (opts.verbose) {
+			if (opts.werror && warnings)
+				WARN("%d warning(s) upgraded to errors", warnings);
+			disas_warned_funcs(disas_ctx);
+		}
+	}
 
-	if (opts.werror && warnings)
-		ret = 1;
+	if (opts.disas)
+		disas_funcs(disas_ctx);
 
-	if (opts.verbose) {
-		if (opts.werror && warnings)
-			WARN("%d warning(s) upgraded to errors", warnings);
-		print_args();
-		disas_warned_funcs(file);
+	if (disas_ctx) {
+		disas_context_destroy(disas_ctx);
+		objtool_disas_ctx = NULL;
 	}
 
+	free_insns(file);
+
+	if (!ret && !warnings)
+		return 0;
+
+	if (opts.backup && make_backup())
+		return 1;
+
 	return ret;
 }
diff --git a/tools/objtool/disas.c b/tools/objtool/disas.c
new file mode 100644
index 000000000000..2b5059f55e40
--- /dev/null
+++ b/tools/objtool/disas.c
@@ -0,0 +1,1248 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <fnmatch.h>
+
+#include <objtool/arch.h>
+#include <objtool/check.h>
+#include <objtool/disas.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+
+#include <bfd.h>
+#include <linux/string.h>
+#include <tools/dis-asm-compat.h>
+
+/*
+ * Size of the buffer for storing the result of disassembling
+ * a single instruction.
+ */
+#define DISAS_RESULT_SIZE	1024
+
+struct disas_context {
+	struct objtool_file *file;
+	struct instruction *insn;
+	bool alt_applied;
+	char result[DISAS_RESULT_SIZE];
+	disassembler_ftype disassembler;
+	struct disassemble_info info;
+};
+
+/*
+ * Maximum number of alternatives
+ */
+#define DISAS_ALT_MAX		5
+
+/*
+ * Maximum number of instructions per alternative
+ */
+#define DISAS_ALT_INSN_MAX	50
+
+/*
+ * Information to disassemble an alternative
+ */
+struct disas_alt {
+	struct instruction *orig_insn;		/* original instruction */
+	struct alternative *alt;		/* alternative or NULL if default code */
+	char *name;				/* name for this alternative */
+	int width;				/* formatting width */
+	struct {
+		char *str;			/* instruction string */
+		int offset;			/* instruction offset */
+		int nops;			/* number of nops */
+	} insn[DISAS_ALT_INSN_MAX];		/* alternative instructions */
+	int insn_idx;				/* index of the next instruction to print */
+};
+
+#define DALT_DEFAULT(dalt)	(!(dalt)->alt)
+#define DALT_INSN(dalt)		(DALT_DEFAULT(dalt) ? (dalt)->orig_insn : (dalt)->alt->insn)
+#define DALT_GROUP(dalt)	(DALT_INSN(dalt)->alt_group)
+#define DALT_ALTID(dalt)	((dalt)->orig_insn->offset)
+
+#define ALT_FLAGS_SHIFT		16
+#define ALT_FLAG_NOT		(1 << 0)
+#define ALT_FLAG_DIRECT_CALL	(1 << 1)
+#define ALT_FEATURE_MASK	((1 << ALT_FLAGS_SHIFT) - 1)
+
+static int alt_feature(unsigned int ft_flags)
+{
+	return (ft_flags & ALT_FEATURE_MASK);
+}
+
+static int alt_flags(unsigned int ft_flags)
+{
+	return (ft_flags >> ALT_FLAGS_SHIFT);
+}
+
+/*
+ * Wrapper around asprintf() to allocate and format a string.
+ * Return the allocated string or NULL on error.
+ */
+static char *strfmt(const char *fmt, ...)
+{
+	va_list ap;
+	char *str;
+	int rv;
+
+	va_start(ap, fmt);
+	rv = vasprintf(&str, fmt, ap);
+	va_end(ap);
+
+	return rv == -1 ? NULL : str;
+}
+
+static int sprint_name(char *str, const char *name, unsigned long offset)
+{
+	int len;
+
+	if (offset)
+		len = sprintf(str, "%s+0x%lx", name, offset);
+	else
+		len = sprintf(str, "%s", name);
+
+	return len;
+}
+
+#define DINFO_FPRINTF(dinfo, ...)	\
+	((*(dinfo)->fprintf_func)((dinfo)->stream, __VA_ARGS__))
+
+static int disas_result_fprintf(struct disas_context *dctx,
+				const char *fmt, va_list ap)
+{
+	char *buf = dctx->result;
+	int avail, len;
+
+	len = strlen(buf);
+	if (len >= DISAS_RESULT_SIZE - 1) {
+		WARN_FUNC(dctx->insn->sec, dctx->insn->offset,
+			  "disassembly buffer is full");
+		return -1;
+	}
+	avail = DISAS_RESULT_SIZE - len;
+
+	len = vsnprintf(buf + len, avail, fmt, ap);
+	if (len < 0 || len >= avail) {
+		WARN_FUNC(dctx->insn->sec, dctx->insn->offset,
+			  "disassembly buffer is truncated");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int disas_fprintf(void *stream, const char *fmt, ...)
+{
+	va_list arg;
+	int rv;
+
+	va_start(arg, fmt);
+	rv = disas_result_fprintf(stream, fmt, arg);
+	va_end(arg);
+
+	return rv;
+}
+
+/*
+ * For init_disassemble_info_compat().
+ */
+static int disas_fprintf_styled(void *stream,
+				enum disassembler_style style,
+				const char *fmt, ...)
+{
+	va_list arg;
+	int rv;
+
+	va_start(arg, fmt);
+	rv = disas_result_fprintf(stream, fmt, arg);
+	va_end(arg);
+
+	return rv;
+}
+
+static void disas_print_addr_sym(struct section *sec, struct symbol *sym,
+				 bfd_vma addr, struct disassemble_info *dinfo)
+{
+	char symstr[1024];
+	char *str;
+
+	if (sym) {
+		sprint_name(symstr, sym->name, addr - sym->offset);
+		DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr);
+	} else {
+		str = offstr(sec, addr);
+		DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str);
+		free(str);
+	}
+}
+
+static bool disas_print_addr_alt(bfd_vma addr, struct disassemble_info *dinfo)
+{
+	struct disas_context *dctx = dinfo->application_data;
+	struct instruction *orig_first_insn;
+	struct alt_group *alt_group;
+	unsigned long offset;
+	struct symbol *sym;
+
+	/*
+	 * Check if we are processing an alternative at the original
+	 * instruction address (i.e. if alt_applied is true) and if
+	 * we are referencing an address inside the alternative.
+	 *
+	 * For example, this happens if there is a branch inside an
+	 * alternative. In that case, the address should be updated
+	 * to a reference inside the original instruction flow.
+	 */
+	if (!dctx->alt_applied)
+		return false;
+
+	alt_group = dctx->insn->alt_group;
+	if (!alt_group || !alt_group->orig_group ||
+	    addr < alt_group->first_insn->offset ||
+	    addr > alt_group->last_insn->offset)
+		return false;
+
+	orig_first_insn = alt_group->orig_group->first_insn;
+	offset = addr - alt_group->first_insn->offset;
+
+	addr = orig_first_insn->offset + offset;
+	sym = orig_first_insn->sym;
+
+	disas_print_addr_sym(orig_first_insn->sec, sym, addr, dinfo);
+
+	return true;
+}
+
+static void disas_print_addr_noreloc(bfd_vma addr,
+				     struct disassemble_info *dinfo)
+{
+	struct disas_context *dctx = dinfo->application_data;
+	struct instruction *insn = dctx->insn;
+	struct symbol *sym = NULL;
+
+	if (disas_print_addr_alt(addr, dinfo))
+		return;
+
+	if (insn->sym && addr >= insn->sym->offset &&
+	    addr < insn->sym->offset + insn->sym->len) {
+		sym = insn->sym;
+	}
+
+	disas_print_addr_sym(insn->sec, sym, addr, dinfo);
+}
+
+static void disas_print_addr_reloc(bfd_vma addr, struct disassemble_info *dinfo)
+{
+	struct disas_context *dctx = dinfo->application_data;
+	struct instruction *insn = dctx->insn;
+	unsigned long offset;
+	struct reloc *reloc;
+	char symstr[1024];
+	char *str;
+
+	reloc = find_reloc_by_dest_range(dctx->file->elf, insn->sec,
+					 insn->offset, insn->len);
+	if (!reloc) {
+		/*
+		 * There is no relocation for this instruction although
+		 * the address to resolve points to the next instruction.
+		 * So this is an effective reference to the next IP, for
+		 * example: "lea 0x0(%rip),%rdi". The kernel can reference
+		 * the next IP with _THIS_IP_ macro.
+		 */
+		DINFO_FPRINTF(dinfo, "0x%lx <_THIS_IP_>", addr);
+		return;
+	}
+
+	offset = arch_insn_adjusted_addend(insn, reloc);
+
+	/*
+	 * If the relocation symbol is a section name (for example ".bss")
+	 * then we try to further resolve the name.
+	 */
+	if (reloc->sym->type == STT_SECTION) {
+		str = offstr(reloc->sym->sec, reloc->sym->offset + offset);
+		DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str);
+		free(str);
+	} else {
+		sprint_name(symstr, reloc->sym->name, offset);
+		DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr);
+	}
+}
+
+/*
+ * Resolve an address into a "<symbol>+<offset>" string.
+ */
+static void disas_print_address(bfd_vma addr, struct disassemble_info *dinfo)
+{
+	struct disas_context *dctx = dinfo->application_data;
+	struct instruction *insn = dctx->insn;
+	struct instruction *jump_dest;
+	struct symbol *sym;
+	bool is_reloc;
+
+	/*
+	 * If the instruction is a call/jump and it references a
+	 * destination then this is likely the address we are looking
+	 * up. So check it first.
+	 */
+	jump_dest = insn->jump_dest;
+	if (jump_dest && jump_dest->sym && jump_dest->offset == addr) {
+		if (!disas_print_addr_alt(addr, dinfo))
+			disas_print_addr_sym(jump_dest->sec, jump_dest->sym,
+					     addr, dinfo);
+		return;
+	}
+
+	/*
+	 * If the address points to the next instruction then there is
+	 * probably a relocation. It can be a false positive when the
+	 * current instruction is referencing the address of the next
+	 * instruction. This particular case will be handled in
+	 * disas_print_addr_reloc().
+	 */
+	is_reloc = (addr == insn->offset + insn->len);
+
+	/*
+	 * The call destination offset can be the address we are looking
+	 * up, or 0 if there is a relocation.
+	 */
+	sym = insn_call_dest(insn);
+	if (sym && (sym->offset == addr || (sym->offset == 0 && is_reloc))) {
+		DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, sym->name);
+		return;
+	}
+
+	if (!is_reloc)
+		disas_print_addr_noreloc(addr, dinfo);
+	else
+		disas_print_addr_reloc(addr, dinfo);
+}
+
+/*
+ * Initialize disassemble info arch, mach (32 or 64-bit) and options.
+ */
+int disas_info_init(struct disassemble_info *dinfo,
+		    int arch, int mach32, int mach64,
+		    const char *options)
+{
+	struct disas_context *dctx = dinfo->application_data;
+	struct objtool_file *file = dctx->file;
+
+	dinfo->arch = arch;
+
+	switch (file->elf->ehdr.e_ident[EI_CLASS]) {
+	case ELFCLASS32:
+		dinfo->mach = mach32;
+		break;
+	case ELFCLASS64:
+		dinfo->mach = mach64;
+		break;
+	default:
+		return -1;
+	}
+
+	dinfo->disassembler_options = options;
+
+	return 0;
+}
+
+struct disas_context *disas_context_create(struct objtool_file *file)
+{
+	struct disas_context *dctx;
+	struct disassemble_info *dinfo;
+	int err;
+
+	dctx = malloc(sizeof(*dctx));
+	if (!dctx) {
+		WARN("failed to allocate disassembly context");
+		return NULL;
+	}
+
+	dctx->file = file;
+	dinfo = &dctx->info;
+
+	init_disassemble_info_compat(dinfo, dctx,
+				     disas_fprintf, disas_fprintf_styled);
+
+	dinfo->read_memory_func = buffer_read_memory;
+	dinfo->print_address_func = disas_print_address;
+	dinfo->application_data = dctx;
+
+	/*
+	 * bfd_openr() is not used to avoid doing ELF data processing
+	 * and caching that has already being done. Here, we just need
+	 * to identify the target file so we call an arch specific
+	 * function to fill some disassemble info (arch, mach).
+	 */
+
+	dinfo->arch = bfd_arch_unknown;
+	dinfo->mach = 0;
+
+	err = arch_disas_info_init(dinfo);
+	if (err || dinfo->arch == bfd_arch_unknown || dinfo->mach == 0) {
+		WARN("failed to init disassembly arch");
+		goto error;
+	}
+
+	dinfo->endian = (file->elf->ehdr.e_ident[EI_DATA] == ELFDATA2MSB) ?
+		BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
+
+	disassemble_init_for_target(dinfo);
+
+	dctx->disassembler = disassembler(dinfo->arch,
+					  dinfo->endian == BFD_ENDIAN_BIG,
+					  dinfo->mach, NULL);
+	if (!dctx->disassembler) {
+		WARN("failed to create disassembler function");
+		goto error;
+	}
+
+	return dctx;
+
+error:
+	free(dctx);
+	return NULL;
+}
+
+void disas_context_destroy(struct disas_context *dctx)
+{
+	free(dctx);
+}
+
+char *disas_result(struct disas_context *dctx)
+{
+	return dctx->result;
+}
+
+#define DISAS_INSN_OFFSET_SPACE		10
+#define DISAS_INSN_SPACE		60
+
+#define DISAS_PRINSN(dctx, insn, depth)			\
+	disas_print_insn(stdout, dctx, insn, depth, "\n")
+
+/*
+ * Print a message in the instruction flow. If sec is not NULL then the
+ * address at the section offset is printed in addition of the message,
+ * otherwise only the message is printed.
+ */
+static int disas_vprint(FILE *stream, struct section *sec, unsigned long offset,
+			int depth, const char *format, va_list ap)
+{
+	const char *addr_str;
+	int i, n;
+	int len;
+
+	len = sym_name_max_len + DISAS_INSN_OFFSET_SPACE;
+	if (depth < 0) {
+		len += depth;
+		depth = 0;
+	}
+
+	n = 0;
+
+	if (sec) {
+		addr_str = offstr(sec, offset);
+		n += fprintf(stream, "%6lx:  %-*s  ", offset, len, addr_str);
+		free((char *)addr_str);
+	} else {
+		len += DISAS_INSN_OFFSET_SPACE + 1;
+		n += fprintf(stream, "%-*s", len, "");
+	}
+
+	/* print vertical bars to show the code flow */
+	for (i = 0; i < depth; i++)
+		n += fprintf(stream, "| ");
+
+	if (format)
+		n += vfprintf(stream, format, ap);
+
+	return n;
+}
+
+static int disas_print(FILE *stream, struct section *sec, unsigned long offset,
+			int depth, const char *format, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, format);
+	len = disas_vprint(stream, sec, offset, depth, format, args);
+	va_end(args);
+
+	return len;
+}
+
+/*
+ * Print a message in the instruction flow. If insn is not NULL then
+ * the instruction address is printed in addition of the message,
+ * otherwise only the message is printed. In all cases, the instruction
+ * itself is not printed.
+ */
+void disas_print_info(FILE *stream, struct instruction *insn, int depth,
+		      const char *format, ...)
+{
+	struct section *sec;
+	unsigned long off;
+	va_list args;
+
+	if (insn) {
+		sec = insn->sec;
+		off = insn->offset;
+	} else {
+		sec = NULL;
+		off = 0;
+	}
+
+	va_start(args, format);
+	disas_vprint(stream, sec, off, depth, format, args);
+	va_end(args);
+}
+
+/*
+ * Print an instruction address (offset and function), the instruction itself
+ * and an optional message.
+ */
+void disas_print_insn(FILE *stream, struct disas_context *dctx,
+		      struct instruction *insn, int depth,
+		      const char *format, ...)
+{
+	char fake_nop_insn[32];
+	const char *insn_str;
+	bool fake_nop;
+	va_list args;
+	int len;
+
+	/*
+	 * Alternative can insert a fake nop, sometimes with no
+	 * associated section so nothing to disassemble.
+	 */
+	fake_nop = (!insn->sec && insn->type == INSN_NOP);
+	if (fake_nop) {
+		snprintf(fake_nop_insn, 32, "<fake nop> (%d bytes)", insn->len);
+		insn_str = fake_nop_insn;
+	} else {
+		disas_insn(dctx, insn);
+		insn_str = disas_result(dctx);
+	}
+
+	/* print the instruction */
+	len = (depth + 1) * 2 < DISAS_INSN_SPACE ? DISAS_INSN_SPACE - (depth+1) * 2 : 1;
+	disas_print_info(stream, insn, depth, "%-*s", len, insn_str);
+
+	/* print message if any */
+	if (!format)
+		return;
+
+	if (strcmp(format, "\n") == 0) {
+		fprintf(stream, "\n");
+		return;
+	}
+
+	fprintf(stream, " - ");
+	va_start(args, format);
+	vfprintf(stream, format, args);
+	va_end(args);
+}
+
+/*
+ * Disassemble a single instruction. Return the size of the instruction.
+ *
+ * If alt_applied is true then insn should be an instruction from of an
+ * alternative (i.e. insn->alt_group != NULL), and it is disassembled
+ * at the location of the original code it is replacing. When the
+ * instruction references any address inside the alternative then
+ * these references will be re-adjusted to replace the original code.
+ */
+static size_t disas_insn_common(struct disas_context *dctx,
+				struct instruction *insn,
+				bool alt_applied)
+{
+	disassembler_ftype disasm = dctx->disassembler;
+	struct disassemble_info *dinfo = &dctx->info;
+
+	dctx->insn = insn;
+	dctx->alt_applied = alt_applied;
+	dctx->result[0] = '\0';
+
+	if (insn->type == INSN_NOP) {
+		DINFO_FPRINTF(dinfo, "nop%d", insn->len);
+		return insn->len;
+	}
+
+	/*
+	 * Set the disassembler buffer to read data from the section
+	 * containing the instruction to disassemble.
+	 */
+	dinfo->buffer = insn->sec->data->d_buf;
+	dinfo->buffer_vma = 0;
+	dinfo->buffer_length = insn->sec->sh.sh_size;
+
+	return disasm(insn->offset, &dctx->info);
+}
+
+size_t disas_insn(struct disas_context *dctx, struct instruction *insn)
+{
+	return disas_insn_common(dctx, insn, false);
+}
+
+static size_t disas_insn_alt(struct disas_context *dctx,
+			     struct instruction *insn)
+{
+	return disas_insn_common(dctx, insn, true);
+}
+
+static struct instruction *next_insn_same_alt(struct objtool_file *file,
+					      struct alt_group *alt_grp,
+					      struct instruction *insn)
+{
+	if (alt_grp->last_insn == insn || alt_grp->nop == insn)
+		return NULL;
+
+	return next_insn_same_sec(file, insn);
+}
+
+#define alt_for_each_insn(file, alt_grp, insn)			\
+	for (insn = alt_grp->first_insn; 			\
+	     insn;						\
+	     insn = next_insn_same_alt(file, alt_grp, insn))
+
+/*
+ * Provide a name for the type of alternatives present at the
+ * specified instruction.
+ *
+ * An instruction can have alternatives with different types, for
+ * example alternative instructions and an exception table. In that
+ * case the name for the alternative instructions type is used.
+ *
+ * Return NULL if the instruction as no alternative.
+ */
+const char *disas_alt_type_name(struct instruction *insn)
+{
+	struct alternative *alt;
+	const char *name;
+
+	name = NULL;
+	for (alt = insn->alts; alt; alt = alt->next) {
+		if (alt->type == ALT_TYPE_INSTRUCTIONS) {
+			name = "alternative";
+			break;
+		}
+
+		switch (alt->type) {
+		case ALT_TYPE_EX_TABLE:
+			name = "ex_table";
+			break;
+		case ALT_TYPE_JUMP_TABLE:
+			name = "jump_table";
+			break;
+		default:
+			name = "unknown";
+			break;
+		}
+	}
+
+	return name;
+}
+
+/*
+ * Provide a name for an alternative.
+ */
+char *disas_alt_name(struct alternative *alt)
+{
+	char pfx[4] = { 0 };
+	char *str = NULL;
+	const char *name;
+	int feature;
+	int flags;
+	int num;
+
+	switch (alt->type) {
+
+	case ALT_TYPE_EX_TABLE:
+		str = strdup("EXCEPTION");
+		break;
+
+	case ALT_TYPE_JUMP_TABLE:
+		str = strdup("JUMP");
+		break;
+
+	case ALT_TYPE_INSTRUCTIONS:
+		/*
+		 * This is a non-default group alternative. Create a name
+		 * based on the feature and flags associated with this
+		 * alternative. Use either the feature name (it is available)
+		 * or the feature number. And add a prefix to show the flags
+		 * used.
+		 *
+		 * Prefix flags characters:
+		 *
+		 *   '!'  alternative used when feature not enabled
+		 *   '+'  direct call alternative
+		 *   '?'  unknown flag
+		 */
+
+		if (!alt->insn->alt_group)
+			return NULL;
+
+		feature = alt->insn->alt_group->feature;
+		num = alt_feature(feature);
+		flags = alt_flags(feature);
+		str = pfx;
+
+		if (flags & ~(ALT_FLAG_NOT | ALT_FLAG_DIRECT_CALL))
+			*str++ = '?';
+		if (flags & ALT_FLAG_DIRECT_CALL)
+			*str++ = '+';
+		if (flags & ALT_FLAG_NOT)
+			*str++ = '!';
+
+		name = arch_cpu_feature_name(num);
+		if (!name)
+			str = strfmt("%sFEATURE 0x%X", pfx, num);
+		else
+			str = strfmt("%s%s", pfx, name);
+
+		break;
+	}
+
+	return str;
+}
+
+/*
+ * Initialize an alternative. The default alternative should be initialized
+ * with alt=NULL.
+ */
+static int disas_alt_init(struct disas_alt *dalt,
+			  struct instruction *orig_insn,
+			  struct alternative *alt)
+{
+	dalt->orig_insn = orig_insn;
+	dalt->alt = alt;
+	dalt->insn_idx = 0;
+	dalt->name = alt ? disas_alt_name(alt) : strdup("DEFAULT");
+	if (!dalt->name)
+		return -1;
+	dalt->width = strlen(dalt->name);
+
+	return 0;
+}
+
+static int disas_alt_add_insn(struct disas_alt *dalt, int index, char *insn_str,
+			      int offset, int nops)
+{
+	int len;
+
+	if (index >= DISAS_ALT_INSN_MAX) {
+		WARN("Alternative %lx.%s has more instructions than supported",
+		     DALT_ALTID(dalt), dalt->name);
+		return -1;
+	}
+
+	len = strlen(insn_str);
+	dalt->insn[index].str = insn_str;
+	dalt->insn[index].offset = offset;
+	dalt->insn[index].nops = nops;
+	if (len > dalt->width)
+		dalt->width = len;
+
+	return 0;
+}
+
+static int disas_alt_jump(struct disas_alt *dalt)
+{
+	struct instruction *orig_insn;
+	struct instruction *dest_insn;
+	char suffix[2] = { 0 };
+	char *str;
+	int nops;
+
+	orig_insn = dalt->orig_insn;
+	dest_insn = dalt->alt->insn;
+
+	if (orig_insn->type == INSN_NOP) {
+		if (orig_insn->len == 5)
+			suffix[0] = 'q';
+		str = strfmt("jmp%-3s %lx <%s+0x%lx>", suffix,
+			     dest_insn->offset, dest_insn->sym->name,
+			     dest_insn->offset - dest_insn->sym->offset);
+		nops = 0;
+	} else {
+		str = strfmt("nop%d", orig_insn->len);
+		nops = orig_insn->len;
+	}
+
+	if (!str)
+		return -1;
+
+	disas_alt_add_insn(dalt, 0, str, 0, nops);
+
+	return 1;
+}
+
+/*
+ * Disassemble an exception table alternative.
+ */
+static int disas_alt_extable(struct disas_alt *dalt)
+{
+	struct instruction *alt_insn;
+	char *str;
+
+	alt_insn = dalt->alt->insn;
+	str = strfmt("resume at 0x%lx <%s+0x%lx>",
+		     alt_insn->offset, alt_insn->sym->name,
+		     alt_insn->offset - alt_insn->sym->offset);
+	if (!str)
+		return -1;
+
+	disas_alt_add_insn(dalt, 0, str, 0, 0);
+
+	return 1;
+}
+
+/*
+ * Disassemble an alternative and store instructions in the disas_alt
+ * structure. Return the number of instructions in the alternative.
+ */
+static int disas_alt_group(struct disas_context *dctx, struct disas_alt *dalt)
+{
+	struct objtool_file *file;
+	struct instruction *insn;
+	int offset;
+	char *str;
+	int count;
+	int nops;
+	int err;
+
+	file = dctx->file;
+	count = 0;
+	offset = 0;
+	nops = 0;
+
+	alt_for_each_insn(file, DALT_GROUP(dalt), insn) {
+
+		disas_insn_alt(dctx, insn);
+		str = strdup(disas_result(dctx));
+		if (!str)
+			return -1;
+
+		nops = insn->type == INSN_NOP ? insn->len : 0;
+		err = disas_alt_add_insn(dalt, count, str, offset, nops);
+		if (err)
+			break;
+		offset += insn->len;
+		count++;
+	}
+
+	return count;
+}
+
+/*
+ * Disassemble the default alternative.
+ */
+static int disas_alt_default(struct disas_context *dctx, struct disas_alt *dalt)
+{
+	char *str;
+	int nops;
+	int err;
+
+	if (DALT_GROUP(dalt))
+		return disas_alt_group(dctx, dalt);
+
+	/*
+	 * Default alternative with no alt_group: this is the default
+	 * code associated with either a jump table or an exception
+	 * table and no other instruction alternatives. In that case
+	 * the default alternative is made of a single instruction.
+	 */
+	disas_insn(dctx, dalt->orig_insn);
+	str = strdup(disas_result(dctx));
+	if (!str)
+		return -1;
+	nops = dalt->orig_insn->type == INSN_NOP ? dalt->orig_insn->len : 0;
+	err = disas_alt_add_insn(dalt, 0, str, 0, nops);
+	if (err)
+		return -1;
+
+	return 1;
+}
+
+/*
+ * For each alternative, if there is an instruction at the specified
+ * offset then print this instruction, otherwise print a blank entry.
+ * The offset is an offset from the start of the alternative.
+ *
+ * Return the offset for the next instructions to print, or -1 if all
+ * instructions have been printed.
+ */
+static int disas_alt_print_insn(struct disas_alt *dalts, int alt_count,
+				int insn_count, int offset)
+{
+	struct disas_alt *dalt;
+	int offset_next;
+	char *str;
+	int i, j;
+
+	offset_next = -1;
+
+	for (i = 0; i < alt_count; i++) {
+		dalt = &dalts[i];
+		j = dalt->insn_idx;
+		if (j == -1) {
+			printf("| %-*s ", dalt->width, "");
+			continue;
+		}
+
+		if (dalt->insn[j].offset == offset) {
+			str = dalt->insn[j].str;
+			printf("| %-*s ", dalt->width, str ?: "");
+			if (++j < insn_count) {
+				dalt->insn_idx = j;
+			} else {
+				dalt->insn_idx = -1;
+				continue;
+			}
+		} else {
+			printf("| %-*s ", dalt->width, "");
+		}
+
+		if (dalt->insn[j].offset > 0 &&
+		    (offset_next == -1 ||
+		     (dalt->insn[j].offset < offset_next)))
+			offset_next = dalt->insn[j].offset;
+	}
+	printf("\n");
+
+	return offset_next;
+}
+
+/*
+ * Print all alternatives side-by-side.
+ */
+static void disas_alt_print_wide(char *alt_name, struct disas_alt *dalts, int alt_count,
+				 int insn_count)
+{
+	struct instruction *orig_insn;
+	int offset_next;
+	int offset;
+	int i;
+
+	orig_insn = dalts[0].orig_insn;
+
+	/*
+	 * Print an header with the name of each alternative.
+	 */
+	disas_print_info(stdout, orig_insn, -2, NULL);
+
+	if (strlen(alt_name) > dalts[0].width)
+		dalts[0].width = strlen(alt_name);
+	printf("| %-*s ", dalts[0].width, alt_name);
+
+	for (i = 1; i < alt_count; i++)
+		printf("| %-*s ", dalts[i].width, dalts[i].name);
+
+	printf("\n");
+
+	/*
+	 * Print instructions for each alternative.
+	 */
+	offset_next = 0;
+	do {
+		offset = offset_next;
+		disas_print(stdout, orig_insn->sec, orig_insn->offset + offset,
+			    -2, NULL);
+		offset_next = disas_alt_print_insn(dalts, alt_count, insn_count,
+						   offset);
+	} while (offset_next > offset);
+}
+
+/*
+ * Print all alternatives one above the other.
+ */
+static void disas_alt_print_compact(char *alt_name, struct disas_alt *dalts,
+				    int alt_count, int insn_count)
+{
+	struct instruction *orig_insn;
+	int width;
+	int i, j;
+	int len;
+
+	orig_insn = dalts[0].orig_insn;
+
+	len = disas_print(stdout, orig_insn->sec, orig_insn->offset, 0, NULL);
+	printf("%s\n", alt_name);
+
+	/*
+	 * If all alternatives have a single instruction then print each
+	 * alternative on a single line. Otherwise, print alternatives
+	 * one above the other with a clear separation.
+	 */
+
+	if (insn_count == 1) {
+		width = 0;
+		for (i = 0; i < alt_count; i++) {
+			if (dalts[i].width > width)
+				width = dalts[i].width;
+		}
+
+		for (i = 0; i < alt_count; i++) {
+			printf("%*s= %-*s    (if %s)\n", len, "", width,
+			       dalts[i].insn[0].str, dalts[i].name);
+		}
+
+		return;
+	}
+
+	for (i = 0; i < alt_count; i++) {
+		printf("%*s= %s\n", len, "", dalts[i].name);
+		for (j = 0; j < insn_count; j++) {
+			if (!dalts[i].insn[j].str)
+				break;
+			disas_print(stdout, orig_insn->sec,
+				    orig_insn->offset + dalts[i].insn[j].offset, 0,
+				    "| %s\n", dalts[i].insn[j].str);
+		}
+		printf("%*s|\n", len, "");
+	}
+}
+
+/*
+ * Trim NOPs in alternatives. This replaces trailing NOPs in alternatives
+ * with a single indication of the number of bytes covered with NOPs.
+ *
+ * Return the maximum numbers of instructions in all alternatives after
+ * trailing NOPs have been trimmed.
+ */
+static int disas_alt_trim_nops(struct disas_alt *dalts, int alt_count,
+			       int insn_count)
+{
+	struct disas_alt *dalt;
+	int nops_count;
+	const char *s;
+	int offset;
+	int count;
+	int nops;
+	int i, j;
+
+	count = 0;
+	for (i = 0; i < alt_count; i++) {
+		offset = 0;
+		nops = 0;
+		nops_count = 0;
+		dalt = &dalts[i];
+		for (j = insn_count - 1; j >= 0; j--) {
+			if (!dalt->insn[j].str || !dalt->insn[j].nops)
+				break;
+			offset = dalt->insn[j].offset;
+			free(dalt->insn[j].str);
+			dalt->insn[j].offset = 0;
+			dalt->insn[j].str = NULL;
+			nops += dalt->insn[j].nops;
+			nops_count++;
+		}
+
+		/*
+		 * All trailing NOPs have been removed. If there was a single
+		 * NOP instruction then re-add it. If there was a block of
+		 * NOPs then indicate the number of bytes than the block
+		 * covers (nop*<number-of-bytes>).
+		 */
+		if (nops_count) {
+			s = nops_count == 1 ? "" : "*";
+			dalt->insn[j + 1].str = strfmt("nop%s%d", s, nops);
+			dalt->insn[j + 1].offset = offset;
+			dalt->insn[j + 1].nops = nops;
+			j++;
+		}
+
+		if (j > count)
+			count = j;
+	}
+
+	return count + 1;
+}
+
+/*
+ * Disassemble an alternative.
+ *
+ * Return the last instruction in the default alternative so that
+ * disassembly can continue with the next instruction. Return NULL
+ * on error.
+ */
+static void *disas_alt(struct disas_context *dctx,
+		       struct instruction *orig_insn)
+{
+	struct disas_alt dalts[DISAS_ALT_MAX] = { 0 };
+	struct instruction *last_insn = NULL;
+	struct alternative *alt;
+	struct disas_alt *dalt;
+	int insn_count = 0;
+	int alt_count = 0;
+	char *alt_name;
+	int count;
+	int i, j;
+	int err;
+
+	alt_name = strfmt("<%s.%lx>", disas_alt_type_name(orig_insn),
+			  orig_insn->offset);
+	if (!alt_name) {
+		WARN("Failed to define name for alternative at instruction 0x%lx",
+		     orig_insn->offset);
+		goto done;
+	}
+
+	/*
+	 * Initialize and disassemble the default alternative.
+	 */
+	err = disas_alt_init(&dalts[0], orig_insn, NULL);
+	if (err) {
+		WARN("%s: failed to initialize default alternative", alt_name);
+		goto done;
+	}
+
+	insn_count = disas_alt_default(dctx, &dalts[0]);
+	if (insn_count < 0) {
+		WARN("%s: failed to disassemble default alternative", alt_name);
+		goto done;
+	}
+
+	/*
+	 * Initialize and disassemble all other alternatives.
+	 */
+	i = 1;
+	for (alt = orig_insn->alts; alt; alt = alt->next) {
+		if (i >= DISAS_ALT_MAX) {
+			WARN("%s has more alternatives than supported", alt_name);
+			break;
+		}
+
+		dalt = &dalts[i];
+		err = disas_alt_init(dalt, orig_insn, alt);
+		if (err) {
+			WARN("%s: failed to disassemble alternative", alt_name);
+			goto done;
+		}
+
+		count = -1;
+		switch (dalt->alt->type) {
+		case ALT_TYPE_INSTRUCTIONS:
+			count = disas_alt_group(dctx, dalt);
+			break;
+		case ALT_TYPE_EX_TABLE:
+			count = disas_alt_extable(dalt);
+			break;
+		case ALT_TYPE_JUMP_TABLE:
+			count = disas_alt_jump(dalt);
+			break;
+		}
+		if (count < 0) {
+			WARN("%s: failed to disassemble alternative %s",
+			     alt_name, dalt->name);
+			goto done;
+		}
+
+		insn_count = count > insn_count ? count : insn_count;
+		i++;
+	}
+	alt_count = i;
+
+	/*
+	 * Print default and non-default alternatives.
+	 */
+
+	insn_count = disas_alt_trim_nops(dalts, alt_count, insn_count);
+
+	if (opts.wide)
+		disas_alt_print_wide(alt_name, dalts, alt_count, insn_count);
+	else
+		disas_alt_print_compact(alt_name, dalts, alt_count, insn_count);
+
+	last_insn = orig_insn->alt_group ? orig_insn->alt_group->last_insn :
+		orig_insn;
+
+done:
+	for (i = 0; i < alt_count; i++) {
+		free(dalts[i].name);
+		for (j = 0; j < insn_count; j++)
+			free(dalts[i].insn[j].str);
+	}
+
+	free(alt_name);
+
+	return last_insn;
+}
+
+/*
+ * Disassemble a function.
+ */
+static void disas_func(struct disas_context *dctx, struct symbol *func)
+{
+	struct instruction *insn_start;
+	struct instruction *insn;
+
+	printf("%s:\n", func->name);
+	sym_for_each_insn(dctx->file, func, insn) {
+		if (insn->alts) {
+			insn_start = insn;
+			insn = disas_alt(dctx, insn);
+			if (insn)
+				continue;
+			/*
+			 * There was an error with disassembling
+			 * the alternative. Resume disassembling
+			 * at the current instruction, this will
+			 * disassemble the default alternative
+			 * only and continue with the code after
+			 * the alternative.
+			 */
+			insn = insn_start;
+		}
+
+		DISAS_PRINSN(dctx, insn, 0);
+	}
+	printf("\n");
+}
+
+/*
+ * Disassemble all warned functions.
+ */
+void disas_warned_funcs(struct disas_context *dctx)
+{
+	struct symbol *sym;
+
+	if (!dctx)
+		return;
+
+	for_each_sym(dctx->file->elf, sym) {
+		if (sym->warned)
+			disas_func(dctx, sym);
+	}
+}
+
+void disas_funcs(struct disas_context *dctx)
+{
+	bool disas_all = !strcmp(opts.disas, "*");
+	struct section *sec;
+	struct symbol *sym;
+
+	for_each_sec(dctx->file->elf, sec) {
+
+		if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+			continue;
+
+		sec_for_each_sym(sec, sym) {
+			/*
+			 * If the function had a warning and the verbose
+			 * option is used then the function was already
+			 * disassemble.
+			 */
+			if (opts.verbose && sym->warned)
+				continue;
+
+			if (disas_all || fnmatch(opts.disas, sym->name, 0) == 0)
+				disas_func(dctx, sym);
+		}
+	}
+}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index ca5d77db692a..6a8ed9c62323 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -16,12 +16,17 @@
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
+#include <libgen.h>
+#include <ctype.h>
 #include <linux/interval_tree_generic.h>
 #include <objtool/builtin.h>
-
 #include <objtool/elf.h>
 #include <objtool/warn.h>
 
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_UP_POW2(x) (1U << ((8 * sizeof(x)) - __builtin_clz((x) - 1U)))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
 static inline u32 str_hash(const char *str)
 {
 	return jhash(str, strlen(str), 0);
@@ -92,11 +97,12 @@ static inline unsigned long __sym_start(struct symbol *s)
 
 static inline unsigned long __sym_last(struct symbol *s)
 {
-	return s->offset + s->len - 1;
+	return s->offset + (s->len ? s->len - 1 : 0);
 }
 
 INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last,
-		     __sym_start, __sym_last, static, __sym)
+		     __sym_start, __sym_last, static inline __maybe_unused,
+		     __sym)
 
 #define __sym_for_each(_iter, _tree, _start, _end)			\
 	for (_iter = __sym_iter_first((_tree), (_start), (_end));	\
@@ -108,7 +114,7 @@ struct symbol_hole {
 };
 
 /*
- * Find !section symbol where @offset is after it.
+ * Find the last symbol before @offset.
  */
 static int symbol_hole_by_offset(const void *key, const struct rb_node *node)
 {
@@ -119,8 +125,7 @@ static int symbol_hole_by_offset(const void *key, const struct rb_node *node)
 		return -1;
 
 	if (sh->key >= s->offset + s->len) {
-		if (s->type != STT_SECTION)
-			sh->sym = s;
+		sh->sym = s;
 		return 1;
 	}
 
@@ -167,11 +172,11 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 {
 	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
-	struct symbol *iter;
+	struct symbol *sym;
 
-	__sym_for_each(iter, tree, offset, offset) {
-		if (iter->offset == offset && iter->type != STT_SECTION)
-			return iter;
+	__sym_for_each(sym, tree, offset, offset) {
+		if (sym->offset == offset && !is_sec_sym(sym))
+			return sym->alias;
 	}
 
 	return NULL;
@@ -180,11 +185,11 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
 {
 	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
-	struct symbol *iter;
+	struct symbol *func;
 
-	__sym_for_each(iter, tree, offset, offset) {
-		if (iter->offset == offset && iter->type == STT_FUNC)
-			return iter;
+	__sym_for_each(func, tree, offset, offset) {
+		if (func->offset == offset && is_func_sym(func))
+			return func->alias;
 	}
 
 	return NULL;
@@ -193,14 +198,29 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
 struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
 {
 	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
-	struct symbol *iter;
+	struct symbol *sym = NULL, *tmp;
 
-	__sym_for_each(iter, tree, offset, offset) {
-		if (iter->type != STT_SECTION)
-			return iter;
+	__sym_for_each(tmp, tree, offset, offset) {
+		if (tmp->len) {
+			if (!sym) {
+				sym = tmp;
+				continue;
+			}
+
+			if (sym->offset != tmp->offset || sym->len != tmp->len) {
+				/*
+				 * In the rare case of overlapping symbols,
+				 * pick the smaller one.
+				 *
+				 * TODO: outlaw overlapping symbols
+				 */
+				if (tmp->len < sym->len)
+					sym = tmp;
+			}
+		}
 	}
 
-	return NULL;
+	return sym ? sym->alias : NULL;
 }
 
 /*
@@ -246,11 +266,11 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset)
 struct symbol *find_func_containing(struct section *sec, unsigned long offset)
 {
 	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
-	struct symbol *iter;
+	struct symbol *func;
 
-	__sym_for_each(iter, tree, offset, offset) {
-		if (iter->type == STT_FUNC)
-			return iter;
+	__sym_for_each(func, tree, offset, offset) {
+		if (is_func_sym(func))
+			return func->alias;
 	}
 
 	return NULL;
@@ -268,6 +288,35 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
 	return NULL;
 }
 
+/* Find local symbol with matching STT_FILE */
+static struct symbol *find_local_symbol_by_file_and_name(const struct elf *elf,
+							 struct symbol *file,
+							 const char *name)
+{
+	struct symbol *sym;
+
+	elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
+		if (sym->bind == STB_LOCAL && sym->file == file &&
+		    !strcmp(sym->name, name)) {
+			return sym;
+		}
+	}
+
+	return NULL;
+}
+
+struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name)
+{
+	struct symbol *sym;
+
+	elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
+		if (!strcmp(sym->name, name) && !is_local_sym(sym))
+			return sym;
+	}
+
+	return NULL;
+}
+
 struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
 				     unsigned long offset, unsigned int len)
 {
@@ -358,14 +407,14 @@ static int read_sections(struct elf *elf)
 			return -1;
 		}
 
-		if (sec->sh.sh_size != 0 && !is_dwarf_section(sec)) {
+		if (sec_size(sec) != 0 && !is_dwarf_section(sec)) {
 			sec->data = elf_getdata(s, NULL);
 			if (!sec->data) {
 				ERROR_ELF("elf_getdata");
 				return -1;
 			}
 			if (sec->data->d_off != 0 ||
-			    sec->data->d_size != sec->sh.sh_size) {
+			    sec->data->d_size != sec_size(sec)) {
 				ERROR("unexpected data attributes for %s", sec->name);
 				return -1;
 			}
@@ -393,7 +442,38 @@ static int read_sections(struct elf *elf)
 	return 0;
 }
 
-static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+static const char *demangle_name(struct symbol *sym)
+{
+	char *str;
+
+	if (!is_local_sym(sym))
+		return sym->name;
+
+	if (!is_func_sym(sym) && !is_object_sym(sym))
+		return sym->name;
+
+	if (!strstarts(sym->name, "__UNIQUE_ID_") && !strchr(sym->name, '.'))
+		return sym->name;
+
+	str = strdup(sym->name);
+	if (!str) {
+		ERROR_GLIBC("strdup");
+		return NULL;
+	}
+
+	for (int i = strlen(str) - 1; i >= 0; i--) {
+		char c = str[i];
+
+		if (!isdigit(c) && c != '.') {
+			str[i + 1] = '\0';
+			break;
+		}
+	}
+
+	return str;
+}
+
+static int elf_add_symbol(struct elf *elf, struct symbol *sym)
 {
 	struct list_head *entry;
 	struct rb_node *pnode;
@@ -405,14 +485,15 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 	sym->type = GELF_ST_TYPE(sym->sym.st_info);
 	sym->bind = GELF_ST_BIND(sym->sym.st_info);
 
-	if (sym->type == STT_FILE)
+	if (is_file_sym(sym))
 		elf->num_files++;
 
 	sym->offset = sym->sym.st_value;
 	sym->len = sym->sym.st_size;
 
 	__sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) {
-		if (iter->offset == sym->offset && iter->type == sym->type)
+		if (!is_undef_sym(iter) && iter->offset == sym->offset &&
+		    iter->type == sym->type && iter->len == sym->len)
 			iter->alias = sym;
 	}
 
@@ -423,21 +504,44 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 	else
 		entry = &sym->sec->symbol_list;
 	list_add(&sym->list, entry);
+
+	list_add_tail(&sym->global_list, &elf->symbols);
 	elf_hash_add(symbol, &sym->hash, sym->idx);
 	elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
 
-	/*
-	 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
-	 * can exist within a function, confusing the sorting.
-	 */
-	if (!sym->len)
-		__sym_remove(sym, &sym->sec->symbol_tree);
+	if (is_func_sym(sym) &&
+	    (strstarts(sym->name, "__pfx_") ||
+	     strstarts(sym->name, "__cfi_") ||
+	     strstarts(sym->name, "__pi___pfx_") ||
+	     strstarts(sym->name, "__pi___cfi_")))
+		sym->prefix = 1;
+
+	if (strstarts(sym->name, ".klp.sym"))
+		sym->klp = 1;
+
+	if (!sym->klp && !is_sec_sym(sym) && strstr(sym->name, ".cold")) {
+		sym->cold = 1;
+
+		/*
+		 * Clang doesn't mark cold subfunctions as STT_FUNC, which
+		 * breaks several objtool assumptions.  Fake it.
+		 */
+		sym->type = STT_FUNC;
+	}
+
+	sym->pfunc = sym->cfunc = sym;
+
+	sym->demangled_name = demangle_name(sym);
+	if (!sym->demangled_name)
+		return -1;
+
+	return 0;
 }
 
 static int read_symbols(struct elf *elf)
 {
 	struct section *symtab, *symtab_shndx, *sec;
-	struct symbol *sym, *pfunc;
+	struct symbol *sym, *pfunc, *file = NULL;
 	int symbols_nr, i;
 	char *coldstr;
 	Elf_Data *shndx_data = NULL;
@@ -469,6 +573,9 @@ static int read_symbols(struct elf *elf)
 		ERROR_GLIBC("calloc");
 		return -1;
 	}
+
+	INIT_LIST_HEAD(&elf->symbols);
+
 	for (i = 0; i < symbols_nr; i++) {
 		sym = &elf->symbol_data[i];
 
@@ -477,14 +584,14 @@ static int read_symbols(struct elf *elf)
 		if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym,
 				      &shndx)) {
 			ERROR_ELF("gelf_getsymshndx");
-			goto err;
+			return -1;
 		}
 
 		sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
 				       sym->sym.st_name);
 		if (!sym->name) {
 			ERROR_ELF("elf_strptr");
-			goto err;
+			return -1;
 		}
 
 		if ((sym->sym.st_shndx > SHN_UNDEF &&
@@ -496,7 +603,7 @@ static int read_symbols(struct elf *elf)
 			sym->sec = find_section_by_index(elf, shndx);
 			if (!sym->sec) {
 				ERROR("couldn't find section for symbol %s", sym->name);
-				goto err;
+				return -1;
 			}
 			if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
 				sym->name = sym->sec->name;
@@ -505,7 +612,13 @@ static int read_symbols(struct elf *elf)
 		} else
 			sym->sec = find_section_by_index(elf, 0);
 
-		elf_add_symbol(elf, sym);
+		if (elf_add_symbol(elf, sym))
+			return -1;
+
+		if (sym->type == STT_FILE)
+			file = sym;
+		else if (sym->bind == STB_LOCAL)
+			sym->file = file;
 	}
 
 	if (opts.stats) {
@@ -518,18 +631,15 @@ static int read_symbols(struct elf *elf)
 		sec_for_each_sym(sec, sym) {
 			char *pname;
 			size_t pnamelen;
-			if (sym->type != STT_FUNC)
-				continue;
-
-			if (sym->pfunc == NULL)
-				sym->pfunc = sym;
 
-			if (sym->cfunc == NULL)
-				sym->cfunc = sym;
+			if (!sym->cold)
+				continue;
 
 			coldstr = strstr(sym->name, ".cold");
-			if (!coldstr)
-				continue;
+			if (!coldstr) {
+				ERROR("%s(): cold subfunction without \".cold\"?", sym->name);
+				return -1;
+			}
 
 			pnamelen = coldstr - sym->name;
 			pname = strndup(sym->name, pnamelen);
@@ -538,7 +648,9 @@ static int read_symbols(struct elf *elf)
 				return -1;
 			}
 
-			pfunc = find_symbol_by_name(elf, pname);
+			pfunc = find_local_symbol_by_file_and_name(elf, sym->file, pname);
+			if (!pfunc)
+				pfunc = find_global_symbol_by_name(elf, pname);
 			free(pname);
 
 			if (!pfunc) {
@@ -546,8 +658,9 @@ static int read_symbols(struct elf *elf)
 				return -1;
 			}
 
-			sym->pfunc = pfunc;
+			sym->pfunc = pfunc->alias;
 			pfunc->cfunc = sym;
+			pfunc->alias->cfunc = sym;
 
 			/*
 			 * Unfortunately, -fnoreorder-functions puts the child
@@ -566,10 +679,6 @@ static int read_symbols(struct elf *elf)
 	}
 
 	return 0;
-
-err:
-	free(sym);
-	return -1;
 }
 
 static int mark_group_syms(struct elf *elf)
@@ -583,7 +692,7 @@ static int mark_group_syms(struct elf *elf)
 		return -1;
 	}
 
-	list_for_each_entry(sec, &elf->sections, list) {
+	for_each_sec(elf, sec) {
 		if (sec->sh.sh_type == SHT_GROUP &&
 		    sec->sh.sh_link == symtab->idx) {
 			sym = find_symbol_by_index(elf, sec->sh.sh_info);
@@ -624,7 +733,7 @@ static int elf_update_sym_relocs(struct elf *elf, struct symbol *sym)
 static int elf_update_symbol(struct elf *elf, struct section *symtab,
 			     struct section *symtab_shndx, struct symbol *sym)
 {
-	Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
+	Elf32_Word shndx;
 	Elf_Data *symtab_data = NULL, *shndx_data = NULL;
 	Elf64_Xword entsize = symtab->sh.sh_entsize;
 	int max_idx, idx = sym->idx;
@@ -632,8 +741,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
 	bool is_special_shndx = sym->sym.st_shndx >= SHN_LORESERVE &&
 				sym->sym.st_shndx != SHN_XINDEX;
 
-	if (is_special_shndx)
-		shndx = sym->sym.st_shndx;
+	shndx = is_special_shndx ? sym->sym.st_shndx : sym->sec->idx;
 
 	s = elf_getscn(elf->elf, symtab->idx);
 	if (!s) {
@@ -731,7 +839,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
 	}
 
 	/* setup extended section index magic and write the symbol */
-	if ((shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) || is_special_shndx) {
+	if (shndx < SHN_LORESERVE || is_special_shndx) {
 		sym->sym.st_shndx = shndx;
 		if (!shndx_data)
 			shndx = 0;
@@ -751,24 +859,58 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
 	return 0;
 }
 
-static struct symbol *
-__elf_create_symbol(struct elf *elf, struct symbol *sym)
+struct symbol *elf_create_symbol(struct elf *elf, const char *name,
+				 struct section *sec, unsigned int bind,
+				 unsigned int type, unsigned long offset,
+				 size_t size)
 {
 	struct section *symtab, *symtab_shndx;
 	Elf32_Word first_non_local, new_idx;
-	struct symbol *old;
+	struct symbol *old, *sym;
 
-	symtab = find_section_by_name(elf, ".symtab");
-	if (symtab) {
-		symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+	sym = calloc(1, sizeof(*sym));
+	if (!sym) {
+		ERROR_GLIBC("calloc");
+		return NULL;
+	}
+
+	sym->name = strdup(name);
+	if (!sym->name) {
+		ERROR_GLIBC("strdup");
+		return NULL;
+	}
+
+	if (type != STT_SECTION) {
+		sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+		if (sym->sym.st_name == -1)
+			return NULL;
+	}
+
+	if (sec) {
+		sym->sec = sec;
 	} else {
+		sym->sec = find_section_by_index(elf, 0);
+		if (!sym->sec) {
+			ERROR("no NULL section");
+			return NULL;
+		}
+	}
+
+	sym->sym.st_info  = GELF_ST_INFO(bind, type);
+	sym->sym.st_value = offset;
+	sym->sym.st_size  = size;
+
+	symtab = find_section_by_name(elf, ".symtab");
+	if (!symtab) {
 		ERROR("no .symtab");
 		return NULL;
 	}
 
+	symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+
 	new_idx = sec_num_entries(symtab);
 
-	if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL)
+	if (bind != STB_LOCAL)
 		goto non_local;
 
 	/*
@@ -806,10 +948,8 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
 
 non_local:
 	sym->idx = new_idx;
-	if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
-		ERROR("elf_update_symbol");
+	if (sym->idx && elf_update_symbol(elf, symtab, symtab_shndx, sym))
 		return NULL;
-	}
 
 	symtab->sh.sh_size += symtab->sh.sh_entsize;
 	mark_sec_changed(elf, symtab, true);
@@ -819,70 +959,28 @@ non_local:
 		mark_sec_changed(elf, symtab_shndx, true);
 	}
 
-	return sym;
-}
-
-static struct symbol *
-elf_create_section_symbol(struct elf *elf, struct section *sec)
-{
-	struct symbol *sym = calloc(1, sizeof(*sym));
-
-	if (!sym) {
-		ERROR_GLIBC("malloc");
+	if (elf_add_symbol(elf, sym))
 		return NULL;
-	}
-
-	sym->name = sec->name;
-	sym->sec = sec;
-
-	// st_name 0
-	sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
-	// st_other 0
-	// st_value 0
-	// st_size 0
-
-	sym = __elf_create_symbol(elf, sym);
-	if (sym)
-		elf_add_symbol(elf, sym);
 
 	return sym;
 }
 
-static int elf_add_string(struct elf *elf, struct section *strtab, char *str);
-
-struct symbol *
-elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size)
+struct symbol *elf_create_section_symbol(struct elf *elf, struct section *sec)
 {
 	struct symbol *sym = calloc(1, sizeof(*sym));
-	size_t namelen = strlen(orig->name) + sizeof("__pfx_");
-	char *name = malloc(namelen);
 
-	if (!sym || !name) {
-		ERROR_GLIBC("malloc");
+	sym = elf_create_symbol(elf, sec->name, sec, STB_LOCAL, STT_SECTION, 0, 0);
+	if (!sym)
 		return NULL;
-	}
 
-	snprintf(name, namelen, "__pfx_%s", orig->name);
-
-	sym->name = name;
-	sym->sec = orig->sec;
-
-	sym->sym.st_name = elf_add_string(elf, NULL, name);
-	sym->sym.st_info = orig->sym.st_info;
-	sym->sym.st_value = orig->sym.st_value - size;
-	sym->sym.st_size = size;
-
-	sym = __elf_create_symbol(elf, sym);
-	if (sym)
-		elf_add_symbol(elf, sym);
+	sec->sym = sym;
 
 	return sym;
 }
 
-static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
-				    unsigned int reloc_idx,
-				    unsigned long offset, struct symbol *sym,
-				    s64 addend, unsigned int type)
+struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
+			     unsigned int reloc_idx, unsigned long offset,
+			     struct symbol *sym, s64 addend, unsigned int type)
 {
 	struct reloc *reloc, empty = { 0 };
 
@@ -922,9 +1020,9 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
 				      unsigned long insn_off)
 {
 	struct symbol *sym = insn_sec->sym;
-	int addend = insn_off;
+	s64 addend = insn_off;
 
-	if (!(insn_sec->sh.sh_flags & SHF_EXECINSTR)) {
+	if (!is_text_sec(insn_sec)) {
 		ERROR("bad call to %s() for data symbol %s", __func__, sym->name);
 		return NULL;
 	}
@@ -939,8 +1037,6 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
 		sym = elf_create_section_symbol(elf, insn_sec);
 		if (!sym)
 			return NULL;
-
-		insn_sec->sym = sym;
 	}
 
 	return elf_init_reloc(elf, sec->rsec, reloc_idx, offset, sym, addend,
@@ -953,7 +1049,7 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
 				      struct symbol *sym,
 				      s64 addend)
 {
-	if (sym->sec && (sec->sh.sh_flags & SHF_EXECINSTR)) {
+	if (is_text_sec(sec)) {
 		ERROR("bad call to %s() for text symbol %s", __func__, sym->name);
 		return NULL;
 	}
@@ -986,12 +1082,16 @@ static int read_relocs(struct elf *elf)
 
 		rsec->base->rsec = rsec;
 
-		nr_reloc = 0;
+		/* nr_alloc_relocs=0: libelf owns d_buf */
+		rsec->nr_alloc_relocs = 0;
+
 		rsec->relocs = calloc(sec_num_entries(rsec), sizeof(*reloc));
 		if (!rsec->relocs) {
 			ERROR_GLIBC("calloc");
 			return -1;
 		}
+
+		nr_reloc = 0;
 		for (i = 0; i < sec_num_entries(rsec); i++) {
 			reloc = &rsec->relocs[i];
 
@@ -1044,6 +1144,12 @@ struct elf *elf_open_read(const char *name, int flags)
 		goto err;
 	}
 
+	elf->name = strdup(name);
+	if (!elf->name) {
+		ERROR_GLIBC("strdup");
+		return NULL;
+	}
+
 	if ((flags & O_ACCMODE) == O_RDONLY)
 		cmd = ELF_C_READ_MMAP;
 	else if ((flags & O_ACCMODE) == O_RDWR)
@@ -1081,11 +1187,142 @@ err:
 	return NULL;
 }
 
-static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name)
 {
-	Elf_Data *data;
-	Elf_Scn *s;
-	int len;
+	struct section *null, *symtab, *strtab, *shstrtab;
+	char *dir, *base, *tmp_name;
+	struct symbol *sym;
+	struct elf *elf;
+
+	elf_version(EV_CURRENT);
+
+	elf = calloc(1, sizeof(*elf));
+	if (!elf) {
+		ERROR_GLIBC("calloc");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&elf->sections);
+
+	dir = strdup(name);
+	if (!dir) {
+		ERROR_GLIBC("strdup");
+		return NULL;
+	}
+
+	dir = dirname(dir);
+
+	base = strdup(name);
+	if (!base) {
+		ERROR_GLIBC("strdup");
+		return NULL;
+	}
+
+	base = basename(base);
+
+	tmp_name = malloc(256);
+	if (!tmp_name) {
+		ERROR_GLIBC("malloc");
+		return NULL;
+	}
+
+	snprintf(tmp_name, 256, "%s/%s.XXXXXX", dir, base);
+
+	elf->fd = mkstemp(tmp_name);
+	if (elf->fd == -1) {
+		ERROR_GLIBC("can't create tmp file");
+		exit(1);
+	}
+
+	elf->tmp_name = tmp_name;
+
+	elf->name = strdup(name);
+	if (!elf->name) {
+		ERROR_GLIBC("strdup");
+		return NULL;
+	}
+
+	elf->elf = elf_begin(elf->fd, ELF_C_WRITE, NULL);
+	if (!elf->elf) {
+		ERROR_ELF("elf_begin");
+		return NULL;
+	}
+
+	if (!gelf_newehdr(elf->elf, ELFCLASS64)) {
+		ERROR_ELF("gelf_newehdr");
+		return NULL;
+	}
+
+	memcpy(&elf->ehdr, ehdr, sizeof(elf->ehdr));
+
+	if (!gelf_update_ehdr(elf->elf, &elf->ehdr)) {
+		ERROR_ELF("gelf_update_ehdr");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&elf->symbols);
+
+	if (!elf_alloc_hash(section,		1000) ||
+	    !elf_alloc_hash(section_name,	1000) ||
+	    !elf_alloc_hash(symbol,		10000) ||
+	    !elf_alloc_hash(symbol_name,	10000) ||
+	    !elf_alloc_hash(reloc,		100000))
+		return NULL;
+
+	null		= elf_create_section(elf, NULL, 0, 0, SHT_NULL, 0, 0);
+	shstrtab	= elf_create_section(elf, NULL, 0, 0, SHT_STRTAB, 1, 0);
+	strtab		= elf_create_section(elf, NULL, 0, 0, SHT_STRTAB, 1, 0);
+
+	if (!null || !shstrtab || !strtab)
+		return NULL;
+
+	null->name	= "";
+	shstrtab->name	= ".shstrtab";
+	strtab->name	= ".strtab";
+
+	null->sh.sh_name	= elf_add_string(elf, shstrtab, null->name);
+	shstrtab->sh.sh_name	= elf_add_string(elf, shstrtab, shstrtab->name);
+	strtab->sh.sh_name	= elf_add_string(elf, shstrtab, strtab->name);
+
+	if (null->sh.sh_name == -1 || shstrtab->sh.sh_name == -1 || strtab->sh.sh_name == -1)
+		return NULL;
+
+	elf_hash_add(section_name, &null->name_hash,		str_hash(null->name));
+	elf_hash_add(section_name, &strtab->name_hash,		str_hash(strtab->name));
+	elf_hash_add(section_name, &shstrtab->name_hash,	str_hash(shstrtab->name));
+
+	if (elf_add_string(elf, strtab, "") == -1)
+		return NULL;
+
+	symtab = elf_create_section(elf, ".symtab", 0x18, 0x18, SHT_SYMTAB, 0x8, 0);
+	if (!symtab)
+		return NULL;
+
+	symtab->sh.sh_link = strtab->idx;
+	symtab->sh.sh_info = 1;
+
+	elf->ehdr.e_shstrndx = shstrtab->idx;
+	if (!gelf_update_ehdr(elf->elf, &elf->ehdr)) {
+		ERROR_ELF("gelf_update_ehdr");
+		return NULL;
+	}
+
+	sym = calloc(1, sizeof(*sym));
+	if (!sym) {
+		ERROR_GLIBC("calloc");
+		return NULL;
+	}
+
+	sym->name = "";
+	sym->sec = null;
+	elf_add_symbol(elf, sym);
+
+	return elf;
+}
+
+unsigned int elf_add_string(struct elf *elf, struct section *strtab, const char *str)
+{
+	unsigned int offset;
 
 	if (!strtab)
 		strtab = find_section_by_name(elf, ".strtab");
@@ -1094,76 +1331,109 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
 		return -1;
 	}
 
-	s = elf_getscn(elf->elf, strtab->idx);
+	if (!strtab->sh.sh_addralign) {
+		ERROR("'%s': invalid sh_addralign", strtab->name);
+		return -1;
+	}
+
+	offset = ALIGN_UP(strtab->sh.sh_size, strtab->sh.sh_addralign);
+
+	if (!elf_add_data(elf, strtab, str, strlen(str) + 1))
+		return -1;
+
+	return offset;
+}
+
+void *elf_add_data(struct elf *elf, struct section *sec, const void *data, size_t size)
+{
+	unsigned long offset;
+	Elf_Scn *s;
+
+	if (!sec->sh.sh_addralign) {
+		ERROR("'%s': invalid sh_addralign", sec->name);
+		return NULL;
+	}
+
+	s = elf_getscn(elf->elf, sec->idx);
 	if (!s) {
 		ERROR_ELF("elf_getscn");
-		return -1;
+		return NULL;
 	}
 
-	data = elf_newdata(s);
-	if (!data) {
+	sec->data = elf_newdata(s);
+	if (!sec->data) {
 		ERROR_ELF("elf_newdata");
-		return -1;
+		return NULL;
 	}
 
-	data->d_buf = str;
-	data->d_size = strlen(str) + 1;
-	data->d_align = 1;
+	sec->data->d_buf = calloc(1, size);
+	if (!sec->data->d_buf) {
+		ERROR_GLIBC("calloc");
+		return NULL;
+	}
 
-	len = strtab->sh.sh_size;
-	strtab->sh.sh_size += data->d_size;
+	if (data)
+		memcpy(sec->data->d_buf, data, size);
 
-	mark_sec_changed(elf, strtab, true);
+	sec->data->d_size = size;
+	sec->data->d_align = 1;
 
-	return len;
+	offset = ALIGN_UP(sec->sh.sh_size, sec->sh.sh_addralign);
+	sec->sh.sh_size = offset + size;
+
+	mark_sec_changed(elf, sec, true);
+
+	return sec->data->d_buf;
 }
 
 struct section *elf_create_section(struct elf *elf, const char *name,
-				   size_t entsize, unsigned int nr)
+				   size_t size, size_t entsize,
+				   unsigned int type, unsigned int align,
+				   unsigned int flags)
 {
 	struct section *sec, *shstrtab;
-	size_t size = entsize * nr;
 	Elf_Scn *s;
 
-	sec = malloc(sizeof(*sec));
+	if (name && find_section_by_name(elf, name)) {
+		ERROR("section '%s' already exists", name);
+		return NULL;
+	}
+
+	sec = calloc(1, sizeof(*sec));
 	if (!sec) {
-		ERROR_GLIBC("malloc");
+		ERROR_GLIBC("calloc");
 		return NULL;
 	}
-	memset(sec, 0, sizeof(*sec));
 
 	INIT_LIST_HEAD(&sec->symbol_list);
 
+	/* don't actually create the section, just the data structures */
+	if (type == SHT_NULL)
+		goto add;
+
 	s = elf_newscn(elf->elf);
 	if (!s) {
 		ERROR_ELF("elf_newscn");
 		return NULL;
 	}
 
-	sec->name = strdup(name);
-	if (!sec->name) {
-		ERROR_GLIBC("strdup");
-		return NULL;
-	}
-
 	sec->idx = elf_ndxscn(s);
 
-	sec->data = elf_newdata(s);
-	if (!sec->data) {
-		ERROR_ELF("elf_newdata");
-		return NULL;
-	}
+	if (size) {
+		sec->data = elf_newdata(s);
+		if (!sec->data) {
+			ERROR_ELF("elf_newdata");
+			return NULL;
+		}
 
-	sec->data->d_size = size;
-	sec->data->d_align = 1;
+		sec->data->d_size = size;
+		sec->data->d_align = 1;
 
-	if (size) {
-		sec->data->d_buf = malloc(size);
+		sec->data->d_buf = calloc(1, size);
 		if (!sec->data->d_buf) {
-			ERROR_GLIBC("malloc");
+			ERROR_GLIBC("calloc");
 			return NULL;
 		}
-		memset(sec->data->d_buf, 0, size);
 	}
 
 	if (!gelf_getshdr(s, &sec->sh)) {
@@ -1173,34 +1443,152 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 
 	sec->sh.sh_size = size;
 	sec->sh.sh_entsize = entsize;
-	sec->sh.sh_type = SHT_PROGBITS;
-	sec->sh.sh_addralign = 1;
-	sec->sh.sh_flags = SHF_ALLOC;
-
-	/* Add section name to .shstrtab (or .strtab for Clang) */
-	shstrtab = find_section_by_name(elf, ".shstrtab");
-	if (!shstrtab)
-		shstrtab = find_section_by_name(elf, ".strtab");
-	if (!shstrtab) {
-		ERROR("can't find .shstrtab or .strtab section");
-		return NULL;
+	sec->sh.sh_type = type;
+	sec->sh.sh_addralign = align;
+	sec->sh.sh_flags = flags;
+
+	if (name) {
+		sec->name = strdup(name);
+		if (!sec->name) {
+			ERROR("strdup");
+			return NULL;
+		}
+
+		/* Add section name to .shstrtab (or .strtab for Clang) */
+		shstrtab = find_section_by_name(elf, ".shstrtab");
+		if (!shstrtab) {
+			shstrtab = find_section_by_name(elf, ".strtab");
+			if (!shstrtab) {
+				ERROR("can't find .shstrtab or .strtab");
+				return NULL;
+			}
+		}
+		sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
+		if (sec->sh.sh_name == -1)
+			return NULL;
+
+		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 	}
-	sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
-	if (sec->sh.sh_name == -1)
-		return NULL;
 
+add:
 	list_add_tail(&sec->list, &elf->sections);
 	elf_hash_add(section, &sec->hash, sec->idx);
-	elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 
 	mark_sec_changed(elf, sec, true);
 
 	return sec;
 }
 
-static struct section *elf_create_rela_section(struct elf *elf,
-					       struct section *sec,
-					       unsigned int reloc_nr)
+static int elf_alloc_reloc(struct elf *elf, struct section *rsec)
+{
+	struct reloc *old_relocs, *old_relocs_end, *new_relocs;
+	unsigned int nr_relocs_old = sec_num_entries(rsec);
+	unsigned int nr_relocs_new = nr_relocs_old + 1;
+	unsigned long nr_alloc;
+	struct symbol *sym;
+
+	if (!rsec->data) {
+		rsec->data = elf_newdata(elf_getscn(elf->elf, rsec->idx));
+		if (!rsec->data) {
+			ERROR_ELF("elf_newdata");
+			return -1;
+		}
+
+		rsec->data->d_align = 1;
+		rsec->data->d_type = ELF_T_RELA;
+		rsec->data->d_buf = NULL;
+	}
+
+	rsec->data->d_size = nr_relocs_new * elf_rela_size(elf);
+	rsec->sh.sh_size   = rsec->data->d_size;
+
+	nr_alloc = MAX(64, ALIGN_UP_POW2(nr_relocs_new));
+	if (nr_alloc <= rsec->nr_alloc_relocs)
+		return 0;
+
+	if (rsec->data->d_buf && !rsec->nr_alloc_relocs) {
+		void *orig_buf = rsec->data->d_buf;
+
+		/*
+		 * The original d_buf is owned by libelf so it can't be
+		 * realloced.
+		 */
+		rsec->data->d_buf = malloc(nr_alloc * elf_rela_size(elf));
+		if (!rsec->data->d_buf) {
+			ERROR_GLIBC("malloc");
+			return -1;
+		}
+		memcpy(rsec->data->d_buf, orig_buf,
+		       nr_relocs_old * elf_rela_size(elf));
+	} else {
+		rsec->data->d_buf = realloc(rsec->data->d_buf,
+					    nr_alloc * elf_rela_size(elf));
+		if (!rsec->data->d_buf) {
+			ERROR_GLIBC("realloc");
+			return -1;
+		}
+	}
+
+	rsec->nr_alloc_relocs = nr_alloc;
+
+	old_relocs = rsec->relocs;
+	new_relocs = calloc(nr_alloc, sizeof(struct reloc));
+	if (!new_relocs) {
+		ERROR_GLIBC("calloc");
+		return -1;
+	}
+
+	if (!old_relocs)
+		goto done;
+
+	/*
+	 * The struct reloc's address has changed.  Update all the symbols and
+	 * relocs which reference it.
+	 */
+
+	old_relocs_end = &old_relocs[nr_relocs_old];
+	for_each_sym(elf, sym) {
+		struct reloc *reloc;
+
+		reloc = sym->relocs;
+		if (!reloc)
+			continue;
+
+		if (reloc >= old_relocs && reloc < old_relocs_end)
+			sym->relocs = &new_relocs[reloc - old_relocs];
+
+		while (1) {
+			struct reloc *next_reloc = sym_next_reloc(reloc);
+
+			if (!next_reloc)
+				break;
+
+			if (next_reloc >= old_relocs && next_reloc < old_relocs_end)
+				set_sym_next_reloc(reloc, &new_relocs[next_reloc - old_relocs]);
+
+			reloc = next_reloc;
+		}
+	}
+
+	memcpy(new_relocs, old_relocs, nr_relocs_old * sizeof(struct reloc));
+
+	for (int i = 0; i < nr_relocs_old; i++) {
+		struct reloc *old = &old_relocs[i];
+		struct reloc *new = &new_relocs[i];
+		u32 key = reloc_hash(old);
+
+		elf_hash_del(reloc, &old->hash, key);
+		elf_hash_add(reloc, &new->hash, key);
+	}
+
+	free(old_relocs);
+done:
+	rsec->relocs = new_relocs;
+	return 0;
+}
+
+struct section *elf_create_rela_section(struct elf *elf, struct section *sec,
+					unsigned int nr_relocs)
 {
 	struct section *rsec;
 	char *rsec_name;
@@ -1213,41 +1601,72 @@ static struct section *elf_create_rela_section(struct elf *elf,
 	strcpy(rsec_name, ".rela");
 	strcat(rsec_name, sec->name);
 
-	rsec = elf_create_section(elf, rsec_name, elf_rela_size(elf), reloc_nr);
+	rsec = elf_create_section(elf, rsec_name, nr_relocs * elf_rela_size(elf),
+				  elf_rela_size(elf), SHT_RELA, elf_addr_size(elf),
+				  SHF_INFO_LINK);
 	free(rsec_name);
 	if (!rsec)
 		return NULL;
 
-	rsec->data->d_type = ELF_T_RELA;
-	rsec->sh.sh_type = SHT_RELA;
-	rsec->sh.sh_addralign = elf_addr_size(elf);
-	rsec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
-	rsec->sh.sh_info = sec->idx;
-	rsec->sh.sh_flags = SHF_INFO_LINK;
+	if (nr_relocs) {
+		rsec->data->d_type = ELF_T_RELA;
 
-	rsec->relocs = calloc(sec_num_entries(rsec), sizeof(struct reloc));
-	if (!rsec->relocs) {
-		ERROR_GLIBC("calloc");
-		return NULL;
+		rsec->nr_alloc_relocs = nr_relocs;
+		rsec->relocs = calloc(nr_relocs, sizeof(struct reloc));
+		if (!rsec->relocs) {
+			ERROR_GLIBC("calloc");
+			return NULL;
+		}
 	}
 
+	rsec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+	rsec->sh.sh_info = sec->idx;
+
 	sec->rsec = rsec;
 	rsec->base = sec;
 
 	return rsec;
 }
 
+struct reloc *elf_create_reloc(struct elf *elf, struct section *sec,
+			       unsigned long offset,
+			       struct symbol *sym, s64 addend,
+			       unsigned int type)
+{
+	struct section *rsec = sec->rsec;
+
+	if (!rsec) {
+		rsec = elf_create_rela_section(elf, sec, 0);
+		if (!rsec)
+			return NULL;
+	}
+
+	if (find_reloc_by_dest(elf, sec, offset)) {
+		ERROR_FUNC(sec, offset, "duplicate reloc");
+		return NULL;
+	}
+
+	if (elf_alloc_reloc(elf, rsec))
+		return NULL;
+
+	mark_sec_changed(elf, rsec, true);
+
+	return elf_init_reloc(elf, rsec, sec_num_entries(rsec) - 1, offset, sym,
+			      addend, type);
+}
+
 struct section *elf_create_section_pair(struct elf *elf, const char *name,
 					size_t entsize, unsigned int nr,
-					unsigned int reloc_nr)
+					unsigned int nr_relocs)
 {
 	struct section *sec;
 
-	sec = elf_create_section(elf, name, entsize, nr);
+	sec = elf_create_section(elf, name, nr * entsize, entsize,
+				 SHT_PROGBITS, 1, SHF_ALLOC);
 	if (!sec)
 		return NULL;
 
-	if (!elf_create_rela_section(elf, sec, reloc_nr))
+	if (!elf_create_rela_section(elf, sec, nr_relocs))
 		return NULL;
 
 	return sec;
@@ -1282,7 +1701,7 @@ int elf_write_insn(struct elf *elf, struct section *sec,
  */
 static int elf_truncate_section(struct elf *elf, struct section *sec)
 {
-	u64 size = sec->sh.sh_size;
+	u64 size = sec_size(sec);
 	bool truncated = false;
 	Elf_Data *data = NULL;
 	Elf_Scn *s;
@@ -1296,7 +1715,6 @@ static int elf_truncate_section(struct elf *elf, struct section *sec)
 	for (;;) {
 		/* get next data descriptor for the relevant section */
 		data = elf_getdata(s, data);
-
 		if (!data) {
 			if (size) {
 				ERROR("end of section data but non-zero size left\n");
@@ -1332,8 +1750,8 @@ int elf_write(struct elf *elf)
 
 	/* Update changed relocation sections and section headers: */
 	list_for_each_entry(sec, &elf->sections, list) {
-		if (sec->truncate)
-			elf_truncate_section(elf, sec);
+		if (sec->truncate && elf_truncate_section(elf, sec))
+			return -1;
 
 		if (sec_changed(sec)) {
 			s = elf_getscn(elf->elf, sec->idx);
@@ -1366,7 +1784,7 @@ int elf_write(struct elf *elf)
 	return 0;
 }
 
-void elf_close(struct elf *elf)
+int elf_close(struct elf *elf)
 {
 	if (elf->elf)
 		elf_end(elf->elf);
@@ -1374,8 +1792,12 @@ void elf_close(struct elf *elf)
 	if (elf->fd > 0)
 		close(elf->fd);
 
+	if (elf->tmp_name && rename(elf->tmp_name, elf->name))
+		return -1;
+
 	/*
 	 * NOTE: All remaining allocations are leaked on purpose.  Objtool is
 	 * about to exit anyway.
 	 */
+	return 0;
 }
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index be33c7b43180..8866158975fc 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -71,7 +71,7 @@ struct stack_op {
 
 struct instruction;
 
-int arch_ftrace_match(char *name);
+int arch_ftrace_match(const char *name);
 
 void arch_initial_func_cfi_state(struct cfi_init_state *state);
 
@@ -83,7 +83,8 @@ bool arch_callee_saved_reg(unsigned char reg);
 
 unsigned long arch_jump_destination(struct instruction *insn);
 
-unsigned long arch_dest_reloc_offset(int addend);
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc);
+u64 arch_adjusted_addend(struct reloc *reloc);
 
 const char *arch_nop_insn(int len);
 const char *arch_ret_insn(int len);
@@ -102,4 +103,15 @@ bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc);
 unsigned int arch_reloc_size(struct reloc *reloc);
 unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
 
+extern const char *arch_reg_name[CFI_NUM_REGS];
+
+#ifdef DISAS
+
+#include <bfd.h>
+#include <dis-asm.h>
+
+int arch_disas_info_init(struct disassemble_info *dinfo);
+
+#endif /* DISAS */
+
 #endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index ab22673862e1..b9e229ed4dc0 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -9,12 +9,15 @@
 
 struct opts {
 	/* actions: */
+	bool cfi;
+	bool checksum;
 	bool dump_orc;
 	bool hack_jump_label;
 	bool hack_noinstr;
 	bool hack_skylake;
 	bool ibt;
 	bool mcount;
+	bool noabs;
 	bool noinstr;
 	bool orc;
 	bool retpoline;
@@ -25,11 +28,12 @@ struct opts {
 	bool static_call;
 	bool uaccess;
 	int prefix;
-	bool cfi;
-	bool noabs;
+	const char *disas;
 
 	/* options: */
 	bool backtrace;
+	bool backup;
+	const char *debug_checksum;
 	bool dryrun;
 	bool link;
 	bool mnop;
@@ -38,8 +42,10 @@ struct opts {
 	const char *output;
 	bool sec_address;
 	bool stats;
+	const char *trace;
 	bool verbose;
 	bool werror;
+	bool wide;
 };
 
 extern struct opts opts;
@@ -48,6 +54,8 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
 
 int objtool_run(int argc, const char **argv);
 
-void print_args(void);
+int make_backup(void);
+
+int cmd_klp(int argc, const char **argv);
 
 #endif /* _BUILTIN_H */
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 00fb745e7233..2e1346ad5e92 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -36,6 +36,19 @@ struct alt_group {
 	struct cfi_state **cfi;
 
 	bool ignore;
+	unsigned int feature;
+};
+
+enum alternative_type {
+	ALT_TYPE_INSTRUCTIONS,
+	ALT_TYPE_JUMP_TABLE,
+	ALT_TYPE_EX_TABLE,
+};
+
+struct alternative {
+	struct alternative *next;
+	struct instruction *insn;
+	enum alternative_type type;
 };
 
 #define INSN_CHUNK_BITS		8
@@ -64,8 +77,11 @@ struct instruction {
 	    noendbr		: 1,
 	    unret		: 1,
 	    visited		: 4,
-	    no_reloc		: 1;
-		/* 10 bit hole */
+	    no_reloc		: 1,
+	    hole		: 1,
+	    fake		: 1,
+	    trace		: 1;
+		/* 9 bit hole */
 
 	struct alt_group *alt_group;
 	struct instruction *jump_dest;
@@ -115,6 +131,15 @@ static inline bool is_jump(struct instruction *insn)
 	return is_static_jump(insn) || is_dynamic_jump(insn);
 }
 
+static inline struct symbol *insn_call_dest(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return NULL;
+
+	return insn->_call_dest;
+}
+
 struct instruction *find_insn(struct objtool_file *file,
 			      struct section *sec, unsigned long offset);
 
@@ -125,4 +150,14 @@ struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruc
 	     insn && insn->sec == _sec;					\
 	     insn = next_insn_same_sec(file, insn))
 
+#define sym_for_each_insn(file, sym, insn)				\
+	for (insn = find_insn(file, sym->sec, sym->offset);		\
+	     insn && insn->offset < sym->offset + sym->len;		\
+	     insn = next_insn_same_sec(file, insn))
+
+const char *objtool_disas_insn(struct instruction *insn);
+
+extern size_t sym_name_max_len;
+extern struct disas_context *objtool_disas_ctx;
+
 #endif /* _CHECK_H */
diff --git a/tools/objtool/include/objtool/checksum.h b/tools/objtool/include/objtool/checksum.h
new file mode 100644
index 000000000000..7fe21608722a
--- /dev/null
+++ b/tools/objtool/include/objtool/checksum.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_CHECKSUM_H
+#define _OBJTOOL_CHECKSUM_H
+
+#include <objtool/elf.h>
+
+#ifdef BUILD_KLP
+
+static inline void checksum_init(struct symbol *func)
+{
+	if (func && !func->csum.state) {
+		func->csum.state = XXH3_createState();
+		XXH3_64bits_reset(func->csum.state);
+	}
+}
+
+static inline void checksum_update(struct symbol *func,
+				   struct instruction *insn,
+				   const void *data, size_t size)
+{
+	XXH3_64bits_update(func->csum.state, data, size);
+	dbg_checksum(func, insn, XXH3_64bits_digest(func->csum.state));
+}
+
+static inline void checksum_finish(struct symbol *func)
+{
+	if (func && func->csum.state) {
+		func->csum.checksum = XXH3_64bits_digest(func->csum.state);
+		func->csum.state = NULL;
+	}
+}
+
+#else /* !BUILD_KLP */
+
+static inline void checksum_init(struct symbol *func) {}
+static inline void checksum_update(struct symbol *func,
+				   struct instruction *insn,
+				   const void *data, size_t size) {}
+static inline void checksum_finish(struct symbol *func) {}
+
+#endif /* !BUILD_KLP */
+
+#endif /* _OBJTOOL_CHECKSUM_H */
diff --git a/tools/objtool/include/objtool/checksum_types.h b/tools/objtool/include/objtool/checksum_types.h
new file mode 100644
index 000000000000..507efdd8ab5b
--- /dev/null
+++ b/tools/objtool/include/objtool/checksum_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _OBJTOOL_CHECKSUM_TYPES_H
+#define _OBJTOOL_CHECKSUM_TYPES_H
+
+struct sym_checksum {
+	u64 addr;
+	u64 checksum;
+};
+
+#ifdef BUILD_KLP
+
+#include <xxhash.h>
+
+struct checksum {
+	XXH3_state_t *state;
+	XXH64_hash_t checksum;
+};
+
+#else /* !BUILD_KLP */
+
+struct checksum {};
+
+#endif /* !BUILD_KLP */
+
+#endif /* _OBJTOOL_CHECKSUM_TYPES_H */
diff --git a/tools/objtool/include/objtool/disas.h b/tools/objtool/include/objtool/disas.h
new file mode 100644
index 000000000000..e8f395eff159
--- /dev/null
+++ b/tools/objtool/include/objtool/disas.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#ifndef _DISAS_H
+#define _DISAS_H
+
+struct alternative;
+struct disas_context;
+struct disassemble_info;
+
+#ifdef DISAS
+
+struct disas_context *disas_context_create(struct objtool_file *file);
+void disas_context_destroy(struct disas_context *dctx);
+void disas_warned_funcs(struct disas_context *dctx);
+void disas_funcs(struct disas_context *dctx);
+int disas_info_init(struct disassemble_info *dinfo,
+		    int arch, int mach32, int mach64,
+		    const char *options);
+size_t disas_insn(struct disas_context *dctx, struct instruction *insn);
+char *disas_result(struct disas_context *dctx);
+void disas_print_info(FILE *stream, struct instruction *insn, int depth,
+		      const char *format, ...);
+void disas_print_insn(FILE *stream, struct disas_context *dctx,
+		      struct instruction *insn, int depth,
+		      const char *format, ...);
+char *disas_alt_name(struct alternative *alt);
+const char *disas_alt_type_name(struct instruction *insn);
+
+#else /* DISAS */
+
+#include <objtool/warn.h>
+
+static inline struct disas_context *disas_context_create(struct objtool_file *file)
+{
+	WARN("Rebuild with libopcodes for disassembly support");
+	return NULL;
+}
+
+static inline void disas_context_destroy(struct disas_context *dctx) {}
+static inline void disas_warned_funcs(struct disas_context *dctx) {}
+static inline void disas_funcs(struct disas_context *dctx) {}
+
+static inline int disas_info_init(struct disassemble_info *dinfo,
+				  int arch, int mach32, int mach64,
+				  const char *options)
+{
+	return -1;
+}
+
+static inline size_t disas_insn(struct disas_context *dctx,
+				struct instruction *insn)
+{
+	return -1;
+}
+
+static inline char *disas_result(struct disas_context *dctx)
+{
+	return NULL;
+}
+
+static inline void disas_print_info(FILE *stream, struct instruction *insn,
+				    int depth, const char *format, ...) {}
+static inline void disas_print_insn(FILE *stream, struct disas_context *dctx,
+				    struct instruction *insn, int depth,
+				    const char *format, ...) {}
+static inline char *disas_alt_name(struct alternative *alt)
+{
+	return NULL;
+}
+
+static inline const char *disas_alt_type_name(struct instruction *insn)
+{
+	return NULL;
+}
+
+#endif /* DISAS */
+
+#endif /* _DISAS_H */
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index df8434d3b744..e12c516bd320 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -8,12 +8,21 @@
 
 #include <stdio.h>
 #include <gelf.h>
+#include <linux/string.h>
 #include <linux/list.h>
 #include <linux/hashtable.h>
 #include <linux/rbtree.h>
 #include <linux/jhash.h>
+
+#include <objtool/endianness.h>
+#include <objtool/checksum_types.h>
 #include <arch/elf.h>
 
+#define SEC_NAME_LEN		1024
+#define SYM_NAME_LEN		512
+
+#define bswap_if_needed(elf, val) __bswap_if_needed(&elf->ehdr, val)
+
 #ifdef LIBELF_USE_DEPRECATED
 # define elf_getshdrnum    elf_getshnum
 # define elf_getshdrstrndx elf_getshstrndx
@@ -40,24 +49,27 @@ struct section {
 	struct section *base, *rsec;
 	struct symbol *sym;
 	Elf_Data *data;
-	char *name;
+	const char *name;
 	int idx;
 	bool _changed, text, rodata, noinstr, init, truncate;
 	struct reloc *relocs;
+	unsigned long nr_alloc_relocs;
+	struct section *twin;
 };
 
 struct symbol {
 	struct list_head list;
+	struct list_head global_list;
 	struct rb_node node;
 	struct elf_hash_node hash;
 	struct elf_hash_node name_hash;
 	GElf_Sym sym;
 	struct section *sec;
-	char *name;
+	const char *name, *demangled_name;
 	unsigned int idx, len;
 	unsigned long offset;
 	unsigned long __subtree_last;
-	struct symbol *pfunc, *cfunc, *alias;
+	struct symbol *pfunc, *cfunc, *alias, *file;
 	unsigned char bind, type;
 	u8 uaccess_safe      : 1;
 	u8 static_call_tramp : 1;
@@ -71,9 +83,17 @@ struct symbol {
 	u8 frame_pointer     : 1;
 	u8 ignore	     : 1;
 	u8 nocfi             : 1;
+	u8 cold		     : 1;
+	u8 prefix	     : 1;
+	u8 debug_checksum    : 1;
+	u8 changed	     : 1;
+	u8 included	     : 1;
+	u8 klp		     : 1;
 	struct list_head pv_target;
 	struct reloc *relocs;
 	struct section *group_sec;
+	struct checksum csum;
+	struct symbol *twin, *clone;
 };
 
 struct reloc {
@@ -88,9 +108,10 @@ struct elf {
 	GElf_Ehdr ehdr;
 	int fd;
 	bool changed;
-	char *name;
+	const char *name, *tmp_name;
 	unsigned int num_files;
 	struct list_head sections;
+	struct list_head symbols;
 	unsigned long num_relocs;
 
 	int symbol_bits;
@@ -110,14 +131,37 @@ struct elf {
 };
 
 struct elf *elf_open_read(const char *name, int flags);
+struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name);
 
 struct section *elf_create_section(struct elf *elf, const char *name,
-				   size_t entsize, unsigned int nr);
+				   size_t size, size_t entsize,
+				   unsigned int type, unsigned int align,
+				   unsigned int flags);
 struct section *elf_create_section_pair(struct elf *elf, const char *name,
 					size_t entsize, unsigned int nr,
 					unsigned int reloc_nr);
 
-struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size);
+struct section *elf_create_rela_section(struct elf *elf, struct section *sec,
+					unsigned int reloc_nr);
+
+struct symbol *elf_create_symbol(struct elf *elf, const char *name,
+				 struct section *sec, unsigned int bind,
+				 unsigned int type, unsigned long offset,
+				 size_t size);
+struct symbol *elf_create_section_symbol(struct elf *elf, struct section *sec);
+
+void *elf_add_data(struct elf *elf, struct section *sec, const void *data,
+		   size_t size);
+
+unsigned int elf_add_string(struct elf *elf, struct section *strtab, const char *str);
+
+struct reloc *elf_create_reloc(struct elf *elf, struct section *sec,
+			       unsigned long offset, struct symbol *sym,
+			       s64 addend, unsigned int type);
+
+struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
+			     unsigned int reloc_idx, unsigned long offset,
+			     struct symbol *sym, s64 addend, unsigned int type);
 
 struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
 				      unsigned long offset,
@@ -131,16 +175,17 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
 				      struct symbol *sym,
 				      s64 addend);
 
-int elf_write_insn(struct elf *elf, struct section *sec,
-		   unsigned long offset, unsigned int len,
-		   const char *insn);
+int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset,
+		   unsigned int len, const char *insn);
+
 int elf_write(struct elf *elf);
-void elf_close(struct elf *elf);
+int elf_close(struct elf *elf);
 
 struct section *find_section_by_name(const struct elf *elf, const char *name);
 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
 struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
+struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name);
 struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
 int find_symbol_hole_containing(const struct section *sec, unsigned long offset);
 struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
@@ -178,11 +223,76 @@ static inline unsigned int elf_text_rela_type(struct elf *elf)
 	return elf_addr_size(elf) == 4 ? R_TEXT32 : R_TEXT64;
 }
 
+static inline bool is_undef_sym(struct symbol *sym)
+{
+	return !sym->sec->idx;
+}
+
+static inline bool is_null_sym(struct symbol *sym)
+{
+	return !sym->idx;
+}
+
+static inline bool is_sec_sym(struct symbol *sym)
+{
+	return sym->type == STT_SECTION;
+}
+
+static inline bool is_object_sym(struct symbol *sym)
+{
+	return sym->type == STT_OBJECT;
+}
+
+static inline bool is_func_sym(struct symbol *sym)
+{
+	return sym->type == STT_FUNC;
+}
+
+static inline bool is_file_sym(struct symbol *sym)
+{
+	return sym->type == STT_FILE;
+}
+
+static inline bool is_notype_sym(struct symbol *sym)
+{
+	return sym->type == STT_NOTYPE;
+}
+
+static inline bool is_global_sym(struct symbol *sym)
+{
+	return sym->bind == STB_GLOBAL;
+}
+
+static inline bool is_weak_sym(struct symbol *sym)
+{
+	return sym->bind == STB_WEAK;
+}
+
+static inline bool is_local_sym(struct symbol *sym)
+{
+	return sym->bind == STB_LOCAL;
+}
+
+static inline bool is_prefix_func(struct symbol *sym)
+{
+	return sym->prefix;
+}
+
 static inline bool is_reloc_sec(struct section *sec)
 {
 	return sec->sh.sh_type == SHT_RELA || sec->sh.sh_type == SHT_REL;
 }
 
+static inline bool is_string_sec(struct section *sec)
+{
+	return sec->sh.sh_flags & SHF_STRINGS;
+}
+
+static inline bool is_text_sec(struct section *sec)
+{
+	return sec->sh.sh_flags & SHF_EXECINSTR;
+}
+
 static inline bool sec_changed(struct section *sec)
 {
 	return sec->_changed;
@@ -223,6 +333,11 @@ static inline bool is_32bit_reloc(struct reloc *reloc)
 	return reloc->sec->sh.sh_entsize < 16;
 }
 
+static inline unsigned long sec_size(struct section *sec)
+{
+	return sec->sh.sh_size;
+}
+
 #define __get_reloc_field(reloc, field)					\
 ({									\
 	is_32bit_reloc(reloc) ?						\
@@ -300,6 +415,15 @@ static inline void set_reloc_type(struct elf *elf, struct reloc *reloc, unsigned
 	mark_sec_changed(elf, reloc->sec, true);
 }
 
+static inline unsigned int annotype(struct elf *elf, struct section *sec,
+				    struct reloc *reloc)
+{
+	unsigned int type;
+
+	type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * 8) + 4);
+	return bswap_if_needed(elf, type);
+}
+
 #define RELOC_JUMP_TABLE_BIT 1UL
 
 /* Does reloc mark the beginning of a jump table? */
@@ -325,28 +449,54 @@ static inline void set_sym_next_reloc(struct reloc *reloc, struct reloc *next)
 	reloc->_sym_next_reloc = (unsigned long)next | bit;
 }
 
-#define for_each_sec(file, sec)						\
-	list_for_each_entry(sec, &file->elf->sections, list)
+#define for_each_sec(elf, sec)						\
+	list_for_each_entry(sec, &elf->sections, list)
 
 #define sec_for_each_sym(sec, sym)					\
 	list_for_each_entry(sym, &sec->symbol_list, list)
 
-#define for_each_sym(file, sym)						\
-	for (struct section *__sec, *__fake = (struct section *)1;	\
-	     __fake; __fake = NULL)					\
-		for_each_sec(file, __sec)				\
-			sec_for_each_sym(__sec, sym)
+#define sec_prev_sym(sym)						\
+	sym->sec && sym->list.prev != &sym->sec->symbol_list ?		\
+	list_prev_entry(sym, list) : NULL
+
+#define for_each_sym(elf, sym)						\
+	list_for_each_entry(sym, &elf->symbols, global_list)
+
+#define for_each_sym_continue(elf, sym)					\
+	list_for_each_entry_continue(sym, &elf->symbols, global_list)
+
+#define rsec_next_reloc(rsec, reloc)					\
+	reloc_idx(reloc) < sec_num_entries(rsec) - 1 ? reloc + 1 : NULL
 
 #define for_each_reloc(rsec, reloc)					\
-	for (int __i = 0, __fake = 1; __fake; __fake = 0)		\
-		for (reloc = rsec->relocs;				\
-		     __i < sec_num_entries(rsec);			\
-		     __i++, reloc++)
+	for (reloc = rsec->relocs; reloc; reloc = rsec_next_reloc(rsec, reloc))
 
 #define for_each_reloc_from(rsec, reloc)				\
-	for (int __i = reloc_idx(reloc);				\
-	     __i < sec_num_entries(rsec);				\
-	     __i++, reloc++)
+	for (; reloc; reloc = rsec_next_reloc(rsec, reloc))
+
+#define for_each_reloc_continue(rsec, reloc)				\
+	for (reloc = rsec_next_reloc(rsec, reloc); reloc;		\
+	     reloc = rsec_next_reloc(rsec, reloc))
+
+#define sym_for_each_reloc(elf, sym, reloc)				\
+	for (reloc = find_reloc_by_dest_range(elf, sym->sec,		\
+					      sym->offset, sym->len);	\
+	     reloc && reloc_offset(reloc) <  sym->offset + sym->len;	\
+	     reloc = rsec_next_reloc(sym->sec->rsec, reloc))
+
+static inline struct symbol *get_func_prefix(struct symbol *func)
+{
+	struct symbol *prev;
+
+	if (!is_func_sym(func))
+		return NULL;
+
+	prev = sec_prev_sym(func);
+	if (prev && is_prefix_func(prev))
+		return prev;
+
+	return NULL;
+}
 
 #define OFFSET_STRIDE_BITS	4
 #define OFFSET_STRIDE		(1UL << OFFSET_STRIDE_BITS)
diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h
index 4d2aa9b0fe2f..aebcd2338668 100644
--- a/tools/objtool/include/objtool/endianness.h
+++ b/tools/objtool/include/objtool/endianness.h
@@ -4,7 +4,6 @@
 
 #include <linux/kernel.h>
 #include <endian.h>
-#include <objtool/elf.h>
 
 /*
  * Does a byte swap if target file endianness doesn't match the host, i.e. cross
@@ -12,16 +11,16 @@
  * To be used for multi-byte values conversion, which are read from / about
  * to be written to a target native endianness ELF file.
  */
-static inline bool need_bswap(struct elf *elf)
+static inline bool need_bswap(GElf_Ehdr *ehdr)
 {
 	return (__BYTE_ORDER == __LITTLE_ENDIAN) ^
-	       (elf->ehdr.e_ident[EI_DATA] == ELFDATA2LSB);
+	       (ehdr->e_ident[EI_DATA] == ELFDATA2LSB);
 }
 
-#define bswap_if_needed(elf, val)					\
+#define __bswap_if_needed(ehdr, val)					\
 ({									\
 	__typeof__(val) __ret;						\
-	bool __need_bswap = need_bswap(elf);				\
+	bool __need_bswap = need_bswap(ehdr);				\
 	switch (sizeof(val)) {						\
 	case 8:								\
 		__ret = __need_bswap ? bswap_64(val) : (val); break;	\
diff --git a/tools/objtool/include/objtool/klp.h b/tools/objtool/include/objtool/klp.h
new file mode 100644
index 000000000000..ad830a7ce55b
--- /dev/null
+++ b/tools/objtool/include/objtool/klp.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_KLP_H
+#define _OBJTOOL_KLP_H
+
+#define SHF_RELA_LIVEPATCH	0x00100000
+#define SHN_LIVEPATCH		0xff20
+
+/*
+ * __klp_objects and __klp_funcs are created by klp diff and used by the patch
+ * module init code to build the klp_patch, klp_object and klp_func structs
+ * needed by the livepatch API.
+ */
+#define KLP_OBJECTS_SEC	"__klp_objects"
+#define KLP_FUNCS_SEC	"__klp_funcs"
+
+/*
+ * __klp_relocs is an intermediate section which are created by klp diff and
+ * converted into KLP symbols/relas by "objtool klp post-link".  This is needed
+ * to work around the linker, which doesn't preserve SHN_LIVEPATCH or
+ * SHF_RELA_LIVEPATCH, nor does it support having two RELA sections for a
+ * single PROGBITS section.
+ */
+#define KLP_RELOCS_SEC	"__klp_relocs"
+#define KLP_STRINGS_SEC	".rodata.klp.str1.1"
+
+struct klp_reloc {
+	void *offset;
+	void *sym;
+	u32 type;
+};
+
+int cmd_klp_diff(int argc, const char **argv);
+int cmd_klp_post_link(int argc, const char **argv);
+
+#endif /* _OBJTOOL_KLP_H */
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index c0dc86a78ff6..f7051bbe0bcb 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -28,7 +28,7 @@ struct objtool_file {
 	struct list_head mcount_loc_list;
 	struct list_head endbr_list;
 	struct list_head call_list;
-	bool ignore_unreachables, hints, rodata;
+	bool ignore_unreachables, hints, rodata, klp;
 
 	unsigned int nr_endbr;
 	unsigned int nr_endbr_int;
@@ -39,6 +39,8 @@ struct objtool_file {
 	struct pv_state *pv_ops;
 };
 
+char *top_level_dir(const char *file);
+
 struct objtool_file *objtool_open_read(const char *_objname);
 
 int objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func);
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 72d09c0adf1a..121c3761899c 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -25,7 +25,7 @@ struct special_alt {
 	struct section *new_sec;
 	unsigned long new_off;
 
-	unsigned int orig_len, new_len; /* group only */
+	unsigned int orig_len, new_len, feature; /* group only */
 };
 
 int special_get_alts(struct elf *elf, struct list_head *alts);
@@ -38,4 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
 struct reloc *arch_find_switch_table(struct objtool_file *file,
 				     struct instruction *insn,
 				     unsigned long *table_size);
+const char *arch_cpu_feature_name(int feature_number);
+
 #endif /* _SPECIAL_H */
diff --git a/tools/objtool/include/objtool/trace.h b/tools/objtool/include/objtool/trace.h
new file mode 100644
index 000000000000..70b574366797
--- /dev/null
+++ b/tools/objtool/include/objtool/trace.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#ifndef _TRACE_H
+#define _TRACE_H
+
+#include <objtool/check.h>
+#include <objtool/disas.h>
+
+#ifdef DISAS
+
+extern bool trace;
+extern int trace_depth;
+
+#define TRACE(fmt, ...)						\
+({	if (trace)						\
+		fprintf(stderr, fmt, ##__VA_ARGS__);		\
+})
+
+/*
+ * Print the instruction address and a message. The instruction
+ * itself is not printed.
+ */
+#define TRACE_ADDR(insn, fmt, ...)				\
+({								\
+	if (trace) {						\
+		disas_print_info(stderr, insn, trace_depth - 1, \
+				 fmt "\n", ##__VA_ARGS__);	\
+	}							\
+})
+
+/*
+ * Print the instruction address, the instruction and a message.
+ */
+#define TRACE_INSN(insn, fmt, ...)				\
+({								\
+	if (trace) {						\
+		disas_print_insn(stderr, objtool_disas_ctx,	\
+				 insn, trace_depth - 1,		\
+				 fmt, ##__VA_ARGS__);		\
+		fprintf(stderr, "\n");				\
+		insn->trace = 1;				\
+	}							\
+})
+
+#define TRACE_INSN_STATE(insn, sprev, snext)			\
+({								\
+	if (trace)						\
+		trace_insn_state(insn, sprev, snext);		\
+})
+
+#define TRACE_ALT_FMT(pfx, fmt) pfx "<%s.%lx> " fmt
+#define TRACE_ALT_ARG(insn) disas_alt_type_name(insn), (insn)->offset
+
+#define TRACE_ALT(insn, fmt, ...)				\
+	TRACE_INSN(insn, TRACE_ALT_FMT("", fmt),		\
+		   TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_INFO(insn, pfx, fmt, ...)			\
+	TRACE_ADDR(insn, TRACE_ALT_FMT(pfx, fmt),		\
+		   TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_INFO_NOADDR(insn, pfx, fmt, ...)		\
+	TRACE_ADDR(NULL, TRACE_ALT_FMT(pfx, fmt),		\
+		   TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_BEGIN(insn, alt, alt_name)			\
+({								\
+	if (trace) {						\
+		alt_name = disas_alt_name(alt);			\
+		trace_alt_begin(insn, alt, alt_name);		\
+	}							\
+})
+
+#define TRACE_ALT_END(insn, alt, alt_name)			\
+({								\
+	if (trace) {						\
+		trace_alt_end(insn, alt, alt_name);		\
+		free(alt_name);					\
+	}							\
+})
+
+static inline void trace_enable(void)
+{
+	trace = true;
+	trace_depth = 0;
+}
+
+static inline void trace_disable(void)
+{
+	trace = false;
+}
+
+static inline void trace_depth_inc(void)
+{
+	if (trace)
+		trace_depth++;
+}
+
+static inline void trace_depth_dec(void)
+{
+	if (trace)
+		trace_depth--;
+}
+
+void trace_insn_state(struct instruction *insn, struct insn_state *sprev,
+		      struct insn_state *snext);
+void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt,
+		     char *alt_name);
+void trace_alt_end(struct instruction *orig_insn, struct alternative *alt,
+		   char *alt_name);
+
+#else /* DISAS */
+
+#define TRACE(fmt, ...) ({})
+#define TRACE_ADDR(insn, fmt, ...) ({})
+#define TRACE_INSN(insn, fmt, ...) ({})
+#define TRACE_INSN_STATE(insn, sprev, snext) ({})
+#define TRACE_ALT(insn, fmt, ...) ({})
+#define TRACE_ALT_INFO(insn, fmt, ...) ({})
+#define TRACE_ALT_INFO_NOADDR(insn, fmt, ...) ({})
+#define TRACE_ALT_BEGIN(insn, alt, alt_name) ({})
+#define TRACE_ALT_END(insn, alt, alt_name) ({})
+
+
+static inline void trace_enable(void) {}
+static inline void trace_disable(void) {}
+static inline void trace_depth_inc(void) {}
+static inline void trace_depth_dec(void) {}
+static inline void trace_alt_begin(struct instruction *orig_insn,
+				   struct alternative *alt,
+				   char *alt_name) {};
+static inline void trace_alt_end(struct instruction *orig_insn,
+				 struct alternative *alt,
+				 char *alt_name) {};
+
+#endif
+
+#endif /* _TRACE_H */
diff --git a/tools/objtool/include/objtool/util.h b/tools/objtool/include/objtool/util.h
new file mode 100644
index 000000000000..a0180b312f73
--- /dev/null
+++ b/tools/objtool/include/objtool/util.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _UTIL_H
+#define _UTIL_H
+
+#include <objtool/warn.h>
+
+#define snprintf_check(str, size, format, args...)			\
+({									\
+	int __ret = snprintf(str, size, format, args);			\
+	if (__ret < 0)							\
+		ERROR_GLIBC("snprintf");				\
+	else if (__ret >= size)						\
+		ERROR("snprintf() failed for '" format "'", args);	\
+	else								\
+		__ret = 0;						\
+	__ret;								\
+})
+
+#endif /* _UTIL_H */
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index cb8fe846d9dd..25ff7942b4d5 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -77,9 +77,11 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 #define WARN_INSN(insn, format, ...)					\
 ({									\
 	struct instruction *_insn = (insn);				\
-	if (!_insn->sym || !_insn->sym->warned)				\
+	if (!_insn->sym || !_insn->sym->warned)	{			\
 		WARN_FUNC(_insn->sec, _insn->offset, format,		\
 			  ##__VA_ARGS__);				\
+		BT_INSN(_insn, "");					\
+	}								\
 	if (_insn->sym)							\
 		_insn->sym->warned = 1;					\
 })
@@ -87,10 +89,15 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 #define BT_INSN(insn, format, ...)				\
 ({								\
 	if (opts.verbose || opts.backtrace) {			\
-		struct instruction *_insn = (insn);		\
-		char *_str = offstr(_insn->sec, _insn->offset); \
-		WARN("  %s: " format, _str, ##__VA_ARGS__);	\
-		free(_str);					\
+		struct instruction *__insn = (insn);		\
+		char *_str = offstr(__insn->sec, __insn->offset); \
+		const char *_istr = objtool_disas_insn(__insn);	\
+		int _len;					\
+		_len = snprintf(NULL, 0, "  %s: " format,  _str, ##__VA_ARGS__);	\
+		_len = (_len < 50) ? 50 - _len : 0;		\
+		WARN("  %s: " format "  %*s%s", _str, ##__VA_ARGS__, _len, "", _istr); \
+		free(_str);						\
+		__insn->trace = 1;				\
 	}							\
 })
 
@@ -102,4 +109,53 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 #define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__)
 #define ERROR_INSN(insn, format, ...) WARN_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__)
 
+extern bool debug;
+extern int indent;
+
+static inline void unindent(int *unused) { indent--; }
+
+/*
+ * Clang prior to 17 is being silly and considers many __cleanup() variables
+ * as unused (because they are, their sole purpose is to go out of scope).
+ *
+ * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61
+ */
+#undef __cleanup
+#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func)))
+
+#define __dbg(format, ...)						\
+	fprintf(stderr,							\
+		"DEBUG: %s%s" format "\n",				\
+		objname ?: "",						\
+		objname ? ": " : "",					\
+		##__VA_ARGS__)
+
+#define dbg(args...)							\
+({									\
+	if (unlikely(debug))						\
+		__dbg(args);						\
+})
+
+#define __dbg_indent(format, ...)					\
+({									\
+	if (unlikely(debug))						\
+		__dbg("%*s" format, indent * 8, "", ##__VA_ARGS__);	\
+})
+
+#define dbg_indent(args...)						\
+	int __cleanup(unindent) __dummy_##__COUNTER__;			\
+	__dbg_indent(args);						\
+	indent++
+
+#define dbg_checksum(func, insn, checksum)				\
+({									\
+	if (unlikely(insn->sym && insn->sym->pfunc &&			\
+		     insn->sym->pfunc->debug_checksum)) {		\
+		char *insn_off = offstr(insn->sec, insn->offset);	\
+		__dbg("checksum: %s %s %016lx",				\
+		      func->name, insn_off, checksum);			\
+		free(insn_off);						\
+	}								\
+})
+
 #endif /* _WARN_H */
diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c
new file mode 100644
index 000000000000..4d1f9e9977eb
--- /dev/null
+++ b/tools/objtool/klp-diff.c
@@ -0,0 +1,1723 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#define _GNU_SOURCE /* memmem() */
+#include <subcmd/parse-options.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/arch.h>
+#include <objtool/klp.h>
+#include <objtool/util.h>
+#include <arch/special.h>
+
+#include <linux/objtool_types.h>
+#include <linux/livepatch_external.h>
+#include <linux/stringify.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+
+struct elfs {
+	struct elf *orig, *patched, *out;
+	const char *modname;
+};
+
+struct export {
+	struct hlist_node hash;
+	char *mod, *sym;
+};
+
+static const char * const klp_diff_usage[] = {
+	"objtool klp diff [<options>] <in1.o> <in2.o> <out.o>",
+	NULL,
+};
+
+static const struct option klp_diff_options[] = {
+	OPT_GROUP("Options:"),
+	OPT_BOOLEAN('d', "debug", &debug, "enable debug output"),
+	OPT_END(),
+};
+
+static DEFINE_HASHTABLE(exports, 15);
+
+static inline u32 str_hash(const char *str)
+{
+	return jhash(str, strlen(str), 0);
+}
+
+static char *escape_str(const char *orig)
+{
+	size_t len = 0;
+	const char *a;
+	char *b, *new;
+
+	for (a = orig; *a; a++) {
+		switch (*a) {
+		case '\001': len += 5; break;
+		case '\n':
+		case '\t':   len += 2; break;
+		default: len++;
+		}
+	}
+
+	new = malloc(len + 1);
+	if (!new)
+		return NULL;
+
+	for (a = orig, b = new; *a; a++) {
+		switch (*a) {
+		case '\001': memcpy(b, "<SOH>", 5); b += 5; break;
+		case '\n': *b++ = '\\'; *b++ = 'n'; break;
+		case '\t': *b++ = '\\'; *b++ = 't'; break;
+		default:   *b++ = *a;
+		}
+	}
+
+	*b = '\0';
+	return new;
+}
+
+static int read_exports(void)
+{
+	const char *symvers = "Module.symvers";
+	char line[1024], *path = NULL;
+	unsigned int line_num = 1;
+	FILE *file;
+
+	file = fopen(symvers, "r");
+	if (!file) {
+		path = top_level_dir(symvers);
+		if (!path) {
+			ERROR("can't open '%s', \"objtool diff\" should be run from the kernel tree", symvers);
+			return -1;
+		}
+
+		file = fopen(path, "r");
+		if (!file) {
+			ERROR_GLIBC("fopen");
+			return -1;
+		}
+	}
+
+	while (fgets(line, 1024, file)) {
+		char *sym, *mod, *type;
+		struct export *export;
+
+		sym = strchr(line, '\t');
+		if (!sym) {
+			ERROR("malformed Module.symvers (sym) at line %d", line_num);
+			return -1;
+		}
+
+		*sym++ = '\0';
+
+		mod = strchr(sym, '\t');
+		if (!mod) {
+			ERROR("malformed Module.symvers (mod) at line %d", line_num);
+			return -1;
+		}
+
+		*mod++ = '\0';
+
+		type = strchr(mod, '\t');
+		if (!type) {
+			ERROR("malformed Module.symvers (type) at line %d", line_num);
+			return -1;
+		}
+
+		*type++ = '\0';
+
+		if (*sym == '\0' || *mod == '\0') {
+			ERROR("malformed Module.symvers at line %d", line_num);
+			return -1;
+		}
+
+		export = calloc(1, sizeof(*export));
+		if (!export) {
+			ERROR_GLIBC("calloc");
+			return -1;
+		}
+
+		export->mod = strdup(mod);
+		if (!export->mod) {
+			ERROR_GLIBC("strdup");
+			return -1;
+		}
+
+		export->sym = strdup(sym);
+		if (!export->sym) {
+			ERROR_GLIBC("strdup");
+			return -1;
+		}
+
+		hash_add(exports, &export->hash, str_hash(sym));
+	}
+
+	free(path);
+	fclose(file);
+
+	return 0;
+}
+
+static int read_sym_checksums(struct elf *elf)
+{
+	struct section *sec;
+
+	sec = find_section_by_name(elf, ".discard.sym_checksum");
+	if (!sec) {
+		ERROR("'%s' missing .discard.sym_checksum section, file not processed by 'objtool --checksum'?",
+		      elf->name);
+		return -1;
+	}
+
+	if (!sec->rsec) {
+		ERROR("missing reloc section for .discard.sym_checksum");
+		return -1;
+	}
+
+	if (sec_size(sec) % sizeof(struct sym_checksum)) {
+		ERROR("struct sym_checksum size mismatch");
+		return -1;
+	}
+
+	for (int i = 0; i < sec_size(sec) / sizeof(struct sym_checksum); i++) {
+		struct sym_checksum *sym_checksum;
+		struct reloc *reloc;
+		struct symbol *sym;
+
+		sym_checksum = (struct sym_checksum *)sec->data->d_buf + i;
+
+		reloc = find_reloc_by_dest(elf, sec, i * sizeof(*sym_checksum));
+		if (!reloc) {
+			ERROR("can't find reloc for sym_checksum[%d]", i);
+			return -1;
+		}
+
+		sym = reloc->sym;
+
+		if (is_sec_sym(sym)) {
+			ERROR("not sure how to handle section %s", sym->name);
+			return -1;
+		}
+
+		if (is_func_sym(sym))
+			sym->csum.checksum = sym_checksum->checksum;
+	}
+
+	return 0;
+}
+
+static struct symbol *first_file_symbol(struct elf *elf)
+{
+	struct symbol *sym;
+
+	for_each_sym(elf, sym) {
+		if (is_file_sym(sym))
+			return sym;
+	}
+
+	return NULL;
+}
+
+static struct symbol *next_file_symbol(struct elf *elf, struct symbol *sym)
+{
+	for_each_sym_continue(elf, sym) {
+		if (is_file_sym(sym))
+			return sym;
+	}
+
+	return NULL;
+}
+
+/*
+ * Certain static local variables should never be correlated.  They will be
+ * used in place rather than referencing the originals.
+ */
+static bool is_uncorrelated_static_local(struct symbol *sym)
+{
+	static const char * const vars[] = {
+		"__already_done.",
+		"__func__.",
+		"__key.",
+		"__warned.",
+		"_entry.",
+		"_entry_ptr.",
+		"_rs.",
+		"descriptor.",
+		"CSWTCH.",
+	};
+
+	if (!is_object_sym(sym) || !is_local_sym(sym))
+		return false;
+
+	if (!strcmp(sym->sec->name, ".data.once"))
+		return true;
+
+	for (int i = 0; i < ARRAY_SIZE(vars); i++) {
+		if (strstarts(sym->name, vars[i]))
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * Clang emits several useless .Ltmp_* code labels.
+ */
+static bool is_clang_tmp_label(struct symbol *sym)
+{
+	return sym->type == STT_NOTYPE &&
+	       is_text_sec(sym->sec) &&
+	       strstarts(sym->name, ".Ltmp") &&
+	       isdigit(sym->name[5]);
+}
+
+static bool is_special_section(struct section *sec)
+{
+	static const char * const specials[] = {
+		".altinstructions",
+		".smp_locks",
+		"__bug_table",
+		"__ex_table",
+		"__jump_table",
+		"__mcount_loc",
+
+		/*
+		 * Extract .static_call_sites here to inherit non-module
+		 * preferential treatment.  The later static call processing
+		 * during klp module build will be skipped when it sees this
+		 * section already exists.
+		 */
+		".static_call_sites",
+	};
+
+	static const char * const non_special_discards[] = {
+		".discard.addressable",
+		".discard.sym_checksum",
+	};
+
+	if (is_text_sec(sec))
+		return false;
+
+	for (int i = 0; i < ARRAY_SIZE(specials); i++) {
+		if (!strcmp(sec->name, specials[i]))
+			return true;
+	}
+
+	/* Most .discard data sections are special */
+	for (int i = 0; i < ARRAY_SIZE(non_special_discards); i++) {
+		if (!strcmp(sec->name, non_special_discards[i]))
+			return false;
+	}
+
+	return strstarts(sec->name, ".discard.");
+}
+
+/*
+ * These sections are referenced by special sections but aren't considered
+ * special sections themselves.
+ */
+static bool is_special_section_aux(struct section *sec)
+{
+	static const char * const specials_aux[] = {
+		".altinstr_replacement",
+		".altinstr_aux",
+	};
+
+	for (int i = 0; i < ARRAY_SIZE(specials_aux); i++) {
+		if (!strcmp(sec->name, specials_aux[i]))
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * These symbols should never be correlated, so their local patched versions
+ * are used instead of linking to the originals.
+ */
+static bool dont_correlate(struct symbol *sym)
+{
+	return is_file_sym(sym) ||
+	       is_null_sym(sym) ||
+	       is_sec_sym(sym) ||
+	       is_prefix_func(sym) ||
+	       is_uncorrelated_static_local(sym) ||
+	       is_clang_tmp_label(sym) ||
+	       is_string_sec(sym->sec) ||
+	       is_special_section(sym->sec) ||
+	       is_special_section_aux(sym->sec) ||
+	       strstarts(sym->name, "__initcall__");
+}
+
+/*
+ * For each symbol in the original kernel, find its corresponding "twin" in the
+ * patched kernel.
+ */
+static int correlate_symbols(struct elfs *e)
+{
+	struct symbol *file1_sym, *file2_sym;
+	struct symbol *sym1, *sym2;
+
+	/* Correlate locals */
+	for (file1_sym = first_file_symbol(e->orig),
+	     file2_sym = first_file_symbol(e->patched); ;
+	     file1_sym = next_file_symbol(e->orig, file1_sym),
+	     file2_sym = next_file_symbol(e->patched, file2_sym)) {
+
+		if (!file1_sym && file2_sym) {
+			ERROR("FILE symbol mismatch: NULL != %s", file2_sym->name);
+			return -1;
+		}
+
+		if (file1_sym && !file2_sym) {
+			ERROR("FILE symbol mismatch: %s != NULL", file1_sym->name);
+			return -1;
+		}
+
+		if (!file1_sym)
+			break;
+
+		if (strcmp(file1_sym->name, file2_sym->name)) {
+			ERROR("FILE symbol mismatch: %s != %s", file1_sym->name, file2_sym->name);
+			return -1;
+		}
+
+		file1_sym->twin = file2_sym;
+		file2_sym->twin = file1_sym;
+
+		sym1 = file1_sym;
+
+		for_each_sym_continue(e->orig, sym1) {
+			if (is_file_sym(sym1) || !is_local_sym(sym1))
+				break;
+
+			if (dont_correlate(sym1))
+				continue;
+
+			sym2 = file2_sym;
+			for_each_sym_continue(e->patched, sym2) {
+				if (is_file_sym(sym2) || !is_local_sym(sym2))
+					break;
+
+				if (sym2->twin || dont_correlate(sym2))
+					continue;
+
+				if (strcmp(sym1->demangled_name, sym2->demangled_name))
+					continue;
+
+				sym1->twin = sym2;
+				sym2->twin = sym1;
+				break;
+			}
+		}
+	}
+
+	/* Correlate globals */
+	for_each_sym(e->orig, sym1) {
+		if (sym1->bind == STB_LOCAL)
+			continue;
+
+		sym2 = find_global_symbol_by_name(e->patched, sym1->name);
+
+		if (sym2 && !sym2->twin && !strcmp(sym1->name, sym2->name)) {
+			sym1->twin = sym2;
+			sym2->twin = sym1;
+		}
+	}
+
+	for_each_sym(e->orig, sym1) {
+		if (sym1->twin || dont_correlate(sym1))
+			continue;
+		WARN("no correlation: %s", sym1->name);
+	}
+
+	return 0;
+}
+
+/* "sympos" is used by livepatch to disambiguate duplicate symbol names */
+static unsigned long find_sympos(struct elf *elf, struct symbol *sym)
+{
+	bool vmlinux = str_ends_with(objname, "vmlinux.o");
+	unsigned long sympos = 0, nr_matches = 0;
+	bool has_dup = false;
+	struct symbol *s;
+
+	if (sym->bind != STB_LOCAL)
+		return 0;
+
+	if (vmlinux && sym->type == STT_FUNC) {
+		/*
+		 * HACK: Unfortunately, symbol ordering can differ between
+		 * vmlinux.o and vmlinux due to the linker script emitting
+		 * .text.unlikely* before .text*.  Count .text.unlikely* first.
+		 *
+		 * TODO: Disambiguate symbols more reliably (checksums?)
+		 */
+		for_each_sym(elf, s) {
+			if (strstarts(s->sec->name, ".text.unlikely") &&
+			    !strcmp(s->name, sym->name)) {
+				nr_matches++;
+				if (s == sym)
+					sympos = nr_matches;
+				else
+					has_dup = true;
+			}
+		}
+		for_each_sym(elf, s) {
+			if (!strstarts(s->sec->name, ".text.unlikely") &&
+			    !strcmp(s->name, sym->name)) {
+				nr_matches++;
+				if (s == sym)
+					sympos = nr_matches;
+				else
+					has_dup = true;
+			}
+		}
+	} else {
+		for_each_sym(elf, s) {
+			if (!strcmp(s->name, sym->name)) {
+				nr_matches++;
+				if (s == sym)
+					sympos = nr_matches;
+				else
+					has_dup = true;
+			}
+		}
+	}
+
+	if (!sympos) {
+		ERROR("can't find sympos for %s", sym->name);
+		return ULONG_MAX;
+	}
+
+	return has_dup ? sympos : 0;
+}
+
+static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym);
+
+static struct symbol *__clone_symbol(struct elf *elf, struct symbol *patched_sym,
+				     bool data_too)
+{
+	struct section *out_sec = NULL;
+	unsigned long offset = 0;
+	struct symbol *out_sym;
+
+	if (data_too && !is_undef_sym(patched_sym)) {
+		struct section *patched_sec = patched_sym->sec;
+
+		out_sec = find_section_by_name(elf, patched_sec->name);
+		if (!out_sec) {
+			out_sec = elf_create_section(elf, patched_sec->name, 0,
+						     patched_sec->sh.sh_entsize,
+						     patched_sec->sh.sh_type,
+						     patched_sec->sh.sh_addralign,
+						     patched_sec->sh.sh_flags);
+			if (!out_sec)
+				return NULL;
+		}
+
+		if (is_string_sec(patched_sym->sec)) {
+			out_sym = elf_create_section_symbol(elf, out_sec);
+			if (!out_sym)
+				return NULL;
+
+			goto sym_created;
+		}
+
+		if (!is_sec_sym(patched_sym))
+			offset = sec_size(out_sec);
+
+		if (patched_sym->len || is_sec_sym(patched_sym)) {
+			void *data = NULL;
+			size_t size;
+
+			/* bss doesn't have data */
+			if (patched_sym->sec->data->d_buf)
+				data = patched_sym->sec->data->d_buf + patched_sym->offset;
+
+			if (is_sec_sym(patched_sym))
+				size = sec_size(patched_sym->sec);
+			else
+				size = patched_sym->len;
+
+			if (!elf_add_data(elf, out_sec, data, size))
+				return NULL;
+		}
+	}
+
+	out_sym = elf_create_symbol(elf, patched_sym->name, out_sec,
+				    patched_sym->bind, patched_sym->type,
+				    offset, patched_sym->len);
+	if (!out_sym)
+		return NULL;
+
+sym_created:
+	patched_sym->clone = out_sym;
+	out_sym->clone = patched_sym;
+
+	return out_sym;
+}
+
+static const char *sym_type(struct symbol *sym)
+{
+	switch (sym->type) {
+	case STT_NOTYPE:  return "NOTYPE";
+	case STT_OBJECT:  return "OBJECT";
+	case STT_FUNC:    return "FUNC";
+	case STT_SECTION: return "SECTION";
+	case STT_FILE:    return "FILE";
+	default:	  return "UNKNOWN";
+	}
+}
+
+static const char *sym_bind(struct symbol *sym)
+{
+	switch (sym->bind) {
+	case STB_LOCAL:   return "LOCAL";
+	case STB_GLOBAL:  return "GLOBAL";
+	case STB_WEAK:    return "WEAK";
+	default:	  return "UNKNOWN";
+	}
+}
+
+/*
+ * Copy a symbol to the output object, optionally including its data and
+ * relocations.
+ */
+static struct symbol *clone_symbol(struct elfs *e, struct symbol *patched_sym,
+				   bool data_too)
+{
+	struct symbol *pfx;
+
+	if (patched_sym->clone)
+		return patched_sym->clone;
+
+	dbg_indent("%s%s", patched_sym->name, data_too ? " [+DATA]" : "");
+
+	/* Make sure the prefix gets cloned first */
+	if (is_func_sym(patched_sym) && data_too) {
+		pfx = get_func_prefix(patched_sym);
+		if (pfx)
+			clone_symbol(e, pfx, true);
+	}
+
+	if (!__clone_symbol(e->out, patched_sym, data_too))
+		return NULL;
+
+	if (data_too && clone_sym_relocs(e, patched_sym))
+		return NULL;
+
+	return patched_sym->clone;
+}
+
+static void mark_included_function(struct symbol *func)
+{
+	struct symbol *pfx;
+
+	func->included = 1;
+
+	/* Include prefix function */
+	pfx = get_func_prefix(func);
+	if (pfx)
+		pfx->included = 1;
+
+	/* Make sure .cold parent+child always stay together */
+	if (func->cfunc && func->cfunc != func)
+		func->cfunc->included = 1;
+	if (func->pfunc && func->pfunc != func)
+		func->pfunc->included = 1;
+}
+
+/*
+ * Copy all changed functions (and their dependencies) from the patched object
+ * to the output object.
+ */
+static int mark_changed_functions(struct elfs *e)
+{
+	struct symbol *sym_orig, *patched_sym;
+	bool changed = false;
+
+	/* Find changed functions */
+	for_each_sym(e->orig, sym_orig) {
+		if (!is_func_sym(sym_orig) || is_prefix_func(sym_orig))
+			continue;
+
+		patched_sym = sym_orig->twin;
+		if (!patched_sym)
+			continue;
+
+		if (sym_orig->csum.checksum != patched_sym->csum.checksum) {
+			patched_sym->changed = 1;
+			mark_included_function(patched_sym);
+			changed = true;
+		}
+	}
+
+	/* Find added functions and print them */
+	for_each_sym(e->patched, patched_sym) {
+		if (!is_func_sym(patched_sym) || is_prefix_func(patched_sym))
+			continue;
+
+		if (!patched_sym->twin) {
+			printf("%s: new function: %s\n", objname, patched_sym->name);
+			mark_included_function(patched_sym);
+			changed = true;
+		}
+	}
+
+	/* Print changed functions */
+	for_each_sym(e->patched, patched_sym) {
+		if (patched_sym->changed)
+			printf("%s: changed function: %s\n", objname, patched_sym->name);
+	}
+
+	return !changed ? -1 : 0;
+}
+
+static int clone_included_functions(struct elfs *e)
+{
+	struct symbol *patched_sym;
+
+	for_each_sym(e->patched, patched_sym) {
+		if (patched_sym->included) {
+			if (!clone_symbol(e, patched_sym, true))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Determine whether a relocation should reference the section rather than the
+ * underlying symbol.
+ */
+static bool section_reference_needed(struct section *sec)
+{
+	/*
+	 * String symbols are zero-length and uncorrelated.  It's easier to
+	 * deal with them as section symbols.
+	 */
+	if (is_string_sec(sec))
+		return true;
+
+	/*
+	 * .rodata has mostly anonymous data so there's no way to determine the
+	 * length of a needed reference.  just copy the whole section if needed.
+	 */
+	if (strstarts(sec->name, ".rodata"))
+		return true;
+
+	/* UBSAN anonymous data */
+	if (strstarts(sec->name, ".data..Lubsan") ||	/* GCC */
+	    strstarts(sec->name, ".data..L__unnamed_"))	/* Clang */
+		return true;
+
+	return false;
+}
+
+static bool is_reloc_allowed(struct reloc *reloc)
+{
+	return section_reference_needed(reloc->sym->sec) == is_sec_sym(reloc->sym);
+}
+
+static struct export *find_export(struct symbol *sym)
+{
+	struct export *export;
+
+	hash_for_each_possible(exports, export, hash, str_hash(sym->name)) {
+		if (!strcmp(export->sym, sym->name))
+			return export;
+	}
+
+	return NULL;
+}
+
+static const char *__find_modname(struct elfs *e)
+{
+	struct section *sec;
+	char *name;
+
+	sec = find_section_by_name(e->orig, ".modinfo");
+	if (!sec) {
+		ERROR("missing .modinfo section");
+		return NULL;
+	}
+
+	name = memmem(sec->data->d_buf, sec_size(sec), "\0name=", 6);
+	if (name)
+		return name + 6;
+
+	name = strdup(e->orig->name);
+	if (!name) {
+		ERROR_GLIBC("strdup");
+		return NULL;
+	}
+
+	for (char *c = name; *c; c++) {
+		if (*c == '/')
+			name = c + 1;
+		else if (*c == '-')
+			*c = '_';
+		else if (*c == '.') {
+			*c = '\0';
+			break;
+		}
+	}
+
+	return name;
+}
+
+/* Get the object's module name as defined by the kernel (and klp_object) */
+static const char *find_modname(struct elfs *e)
+{
+	const char *modname;
+
+	if (e->modname)
+		return e->modname;
+
+	modname = __find_modname(e);
+	e->modname = modname;
+	return modname;
+}
+
+/*
+ * Copying a function from its native compiled environment to a kernel module
+ * removes its natural access to local functions/variables and unexported
+ * globals.  References to such symbols need to be converted to KLP relocs so
+ * the kernel arch relocation code knows to apply them and where to find the
+ * symbols.  Particularly, duplicate static symbols need to be disambiguated.
+ */
+static bool klp_reloc_needed(struct reloc *patched_reloc)
+{
+	struct symbol *patched_sym = patched_reloc->sym;
+	struct export *export;
+
+	/* no external symbol to reference */
+	if (dont_correlate(patched_sym))
+		return false;
+
+	/* For included functions, a regular reloc will do. */
+	if (patched_sym->included)
+		return false;
+
+	/*
+	 * If exported by a module, it has to be a klp reloc.  Thanks to the
+	 * clusterfunk that is late module patching, the patch module is
+	 * allowed to be loaded before any modules it depends on.
+	 *
+	 * If exported by vmlinux, a normal reloc will do.
+	 */
+	export = find_export(patched_sym);
+	if (export)
+		return strcmp(export->mod, "vmlinux");
+
+	if (!patched_sym->twin) {
+		/*
+		 * Presumably the symbol and its reference were added by the
+		 * patch.  The symbol could be defined in this .o or in another
+		 * .o in the patch module.
+		 *
+		 * This check needs to be *after* the export check due to the
+		 * possibility of the patch adding a new UNDEF reference to an
+		 * exported symbol.
+		 */
+		return false;
+	}
+
+	/* Unexported symbol which lives in the original vmlinux or module. */
+	return true;
+}
+
+static int convert_reloc_sym_to_secsym(struct elf *elf, struct reloc *reloc)
+{
+	struct symbol *sym = reloc->sym;
+	struct section *sec = sym->sec;
+
+	if (!sec->sym && !elf_create_section_symbol(elf, sec))
+		return -1;
+
+	reloc->sym = sec->sym;
+	set_reloc_sym(elf, reloc, sym->idx);
+	set_reloc_addend(elf, reloc, sym->offset + reloc_addend(reloc));
+	return 0;
+}
+
+static int convert_reloc_secsym_to_sym(struct elf *elf, struct reloc *reloc)
+{
+	struct symbol *sym = reloc->sym;
+	struct section *sec = sym->sec;
+
+	/* If the symbol has a dedicated section, it's easy to find */
+	sym = find_symbol_by_offset(sec, 0);
+	if (sym && sym->len == sec_size(sec))
+		goto found_sym;
+
+	/* No dedicated section; find the symbol manually */
+	sym = find_symbol_containing(sec, arch_adjusted_addend(reloc));
+	if (!sym) {
+		/*
+		 * This can happen for special section references to weak code
+		 * whose symbol has been stripped by the linker.
+		 */
+		return -1;
+	}
+
+found_sym:
+	reloc->sym = sym;
+	set_reloc_sym(elf, reloc, sym->idx);
+	set_reloc_addend(elf, reloc, reloc_addend(reloc) - sym->offset);
+	return 0;
+}
+
+/*
+ * Convert a relocation symbol reference to the needed format: either a section
+ * symbol or the underlying symbol itself.
+ */
+static int convert_reloc_sym(struct elf *elf, struct reloc *reloc)
+{
+	if (is_reloc_allowed(reloc))
+		return 0;
+
+	if (section_reference_needed(reloc->sym->sec))
+		return convert_reloc_sym_to_secsym(elf, reloc);
+	else
+		return convert_reloc_secsym_to_sym(elf, reloc);
+}
+
+/*
+ * Convert a regular relocation to a klp relocation (sort of).
+ */
+static int clone_reloc_klp(struct elfs *e, struct reloc *patched_reloc,
+			   struct section *sec, unsigned long offset,
+			   struct export *export)
+{
+	struct symbol *patched_sym = patched_reloc->sym;
+	s64 addend = reloc_addend(patched_reloc);
+	const char *sym_modname, *sym_orig_name;
+	static struct section *klp_relocs;
+	struct symbol *sym, *klp_sym;
+	unsigned long klp_reloc_off;
+	char sym_name[SYM_NAME_LEN];
+	struct klp_reloc klp_reloc;
+	unsigned long sympos;
+
+	if (!patched_sym->twin) {
+		ERROR("unexpected klp reloc for new symbol %s", patched_sym->name);
+		return -1;
+	}
+
+	/*
+	 * Keep the original reloc intact for now to avoid breaking objtool run
+	 * which relies on proper relocations for many of its features.  This
+	 * will be disabled later by "objtool klp post-link".
+	 *
+	 * Convert it to UNDEF (and WEAK to avoid modpost warnings).
+	 */
+
+	sym = patched_sym->clone;
+	if (!sym) {
+		/* STB_WEAK: avoid modpost undefined symbol warnings */
+		sym = elf_create_symbol(e->out, patched_sym->name, NULL,
+					STB_WEAK, patched_sym->type, 0, 0);
+		if (!sym)
+			return -1;
+
+		patched_sym->clone = sym;
+		sym->clone = patched_sym;
+	}
+
+	if (!elf_create_reloc(e->out, sec, offset, sym, addend, reloc_type(patched_reloc)))
+		return -1;
+
+	/*
+	 * Create the KLP symbol.
+	 */
+
+	if (export) {
+		sym_modname = export->mod;
+		sym_orig_name = export->sym;
+		sympos = 0;
+	} else {
+		sym_modname = find_modname(e);
+		if (!sym_modname)
+			return -1;
+
+		sym_orig_name = patched_sym->twin->name;
+		sympos = find_sympos(e->orig, patched_sym->twin);
+		if (sympos == ULONG_MAX)
+			return -1;
+	}
+
+	/* symbol format: .klp.sym.modname.sym_name,sympos */
+	if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_SYM_PREFIX "%s.%s,%ld",
+		      sym_modname, sym_orig_name, sympos))
+		return -1;
+
+	klp_sym = find_symbol_by_name(e->out, sym_name);
+	if (!klp_sym) {
+		__dbg_indent("%s", sym_name);
+
+		/* STB_WEAK: avoid modpost undefined symbol warnings */
+		klp_sym = elf_create_symbol(e->out, sym_name, NULL,
+					    STB_WEAK, patched_sym->type, 0, 0);
+		if (!klp_sym)
+			return -1;
+	}
+
+	/*
+	 * Create the __klp_relocs entry.  This will be converted to an actual
+	 * KLP rela by "objtool klp post-link".
+	 *
+	 * This intermediate step is necessary to prevent corruption by the
+	 * linker, which doesn't know how to properly handle two rela sections
+	 * applying to the same base section.
+	 */
+
+	if (!klp_relocs) {
+		klp_relocs = elf_create_section(e->out, KLP_RELOCS_SEC, 0,
+						0, SHT_PROGBITS, 8, SHF_ALLOC);
+		if (!klp_relocs)
+			return -1;
+	}
+
+	klp_reloc_off = sec_size(klp_relocs);
+	memset(&klp_reloc, 0, sizeof(klp_reloc));
+
+	klp_reloc.type = reloc_type(patched_reloc);
+	if (!elf_add_data(e->out, klp_relocs, &klp_reloc, sizeof(klp_reloc)))
+		return -1;
+
+	/* klp_reloc.offset */
+	if (!sec->sym && !elf_create_section_symbol(e->out, sec))
+		return -1;
+
+	if (!elf_create_reloc(e->out, klp_relocs,
+			      klp_reloc_off + offsetof(struct klp_reloc, offset),
+			      sec->sym, offset, R_ABS64))
+		return -1;
+
+	/* klp_reloc.sym */
+	if (!elf_create_reloc(e->out, klp_relocs,
+			      klp_reloc_off + offsetof(struct klp_reloc, sym),
+			      klp_sym, addend, R_ABS64))
+		return -1;
+
+	return 0;
+}
+
+#define dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp)			\
+	dbg_indent("%s+0x%lx: %s%s0x%lx [%s%s%s%s%s%s]",				\
+		   sec->name, offset, patched_sym->name,				\
+		   addend >= 0 ? "+" : "-", labs(addend),				\
+		   sym_type(patched_sym),						\
+		   patched_sym->type == STT_SECTION ? "" : " ",				\
+		   patched_sym->type == STT_SECTION ? "" : sym_bind(patched_sym),	\
+		   is_undef_sym(patched_sym) ? " UNDEF" : "",				\
+		   export ? " EXPORTED" : "",						\
+		   klp ? " KLP" : "")
+
+/* Copy a reloc and its symbol to the output object */
+static int clone_reloc(struct elfs *e, struct reloc *patched_reloc,
+			struct section *sec, unsigned long offset)
+{
+	struct symbol *patched_sym = patched_reloc->sym;
+	struct export *export = find_export(patched_sym);
+	long addend = reloc_addend(patched_reloc);
+	struct symbol *out_sym;
+	bool klp;
+
+	if (!is_reloc_allowed(patched_reloc)) {
+		ERROR_FUNC(patched_reloc->sec->base, reloc_offset(patched_reloc),
+			   "missing symbol for reference to %s+%ld",
+			   patched_sym->name, addend);
+		return -1;
+	}
+
+	klp = klp_reloc_needed(patched_reloc);
+
+	dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp);
+
+	if (klp) {
+		if (clone_reloc_klp(e, patched_reloc, sec, offset, export))
+			return -1;
+
+		return 0;
+	}
+
+	/*
+	 * Why !export sets 'data_too':
+	 *
+	 * Unexported non-klp symbols need to live in the patch module,
+	 * otherwise there will be unresolved symbols.  Notably, this includes:
+	 *
+	 *   - New functions/data
+	 *   - String sections
+	 *   - Special section entries
+	 *   - Uncorrelated static local variables
+	 *   - UBSAN sections
+	 */
+	out_sym = clone_symbol(e, patched_sym, patched_sym->included || !export);
+	if (!out_sym)
+		return -1;
+
+	/*
+	 * For strings, all references use section symbols, thanks to
+	 * section_reference_needed().  clone_symbol() has cloned an empty
+	 * version of the string section.  Now copy the string itself.
+	 */
+	if (is_string_sec(patched_sym->sec)) {
+		const char *str = patched_sym->sec->data->d_buf + addend;
+
+		__dbg_indent("\"%s\"", escape_str(str));
+
+		addend = elf_add_string(e->out, out_sym->sec, str);
+		if (addend == -1)
+			return -1;
+	}
+
+	if (!elf_create_reloc(e->out, sec, offset, out_sym, addend,
+			      reloc_type(patched_reloc)))
+		return -1;
+
+	return 0;
+}
+
+/* Copy all relocs needed for a symbol's contents */
+static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym)
+{
+	struct section *patched_rsec = patched_sym->sec->rsec;
+	struct reloc *patched_reloc;
+	unsigned long start, end;
+	struct symbol *out_sym;
+
+	out_sym = patched_sym->clone;
+	if (!out_sym) {
+		ERROR("no clone for %s", patched_sym->name);
+		return -1;
+	}
+
+	if (!patched_rsec)
+		return 0;
+
+	if (!is_sec_sym(patched_sym) && !patched_sym->len)
+		return 0;
+
+	if (is_string_sec(patched_sym->sec))
+		return 0;
+
+	if (is_sec_sym(patched_sym)) {
+		start = 0;
+		end = sec_size(patched_sym->sec);
+	} else {
+		start = patched_sym->offset;
+		end = start + patched_sym->len;
+	}
+
+	for_each_reloc(patched_rsec, patched_reloc) {
+		unsigned long offset;
+
+		if (reloc_offset(patched_reloc) < start ||
+		    reloc_offset(patched_reloc) >= end)
+			continue;
+
+		/*
+		 * Skip any reloc referencing .altinstr_aux.  Its code is
+		 * always patched by alternatives.  See ALTERNATIVE_TERNARY().
+		 */
+		if (patched_reloc->sym->sec &&
+		    !strcmp(patched_reloc->sym->sec->name, ".altinstr_aux"))
+			continue;
+
+		if (convert_reloc_sym(e->patched, patched_reloc)) {
+			ERROR_FUNC(patched_rsec->base, reloc_offset(patched_reloc),
+				   "failed to convert reloc sym '%s' to its proper format",
+				   patched_reloc->sym->name);
+			return -1;
+		}
+
+		offset = out_sym->offset + (reloc_offset(patched_reloc) - patched_sym->offset);
+
+		if (clone_reloc(e, patched_reloc, out_sym->sec, offset))
+			return -1;
+	}
+	return 0;
+
+}
+
+static int create_fake_symbol(struct elf *elf, struct section *sec,
+			      unsigned long offset, size_t size)
+{
+	char name[SYM_NAME_LEN];
+	unsigned int type;
+	static int ctr;
+	char *c;
+
+	if (snprintf_check(name, SYM_NAME_LEN, "%s_%d", sec->name, ctr++))
+		return -1;
+
+	for (c = name; *c; c++)
+		if (*c == '.')
+			*c = '_';
+
+	/*
+	 * STT_NOTYPE: Prevent objtool from validating .altinstr_replacement
+	 *	       while still allowing objdump to disassemble it.
+	 */
+	type = is_text_sec(sec) ? STT_NOTYPE : STT_OBJECT;
+	return elf_create_symbol(elf, name, sec, STB_LOCAL, type, offset, size) ? 0 : -1;
+}
+
+/*
+ * Special sections (alternatives, etc) are basically arrays of structs.
+ * For all the special sections, create a symbol for each struct entry.  This
+ * is a bit cumbersome, but it makes the extracting of the individual entries
+ * much more straightforward.
+ *
+ * There are three ways to identify the entry sizes for a special section:
+ *
+ * 1) ELF section header sh_entsize: Ideally this would be used almost
+ *    everywhere.  But unfortunately the toolchains make it difficult.  The
+ *    assembler .[push]section directive syntax only takes entsize when
+ *    combined with SHF_MERGE.  But Clang disallows combining SHF_MERGE with
+ *    SHF_WRITE.  And some special sections do need to be writable.
+ *
+ *    Another place this wouldn't work is .altinstr_replacement, whose entries
+ *    don't have a fixed size.
+ *
+ * 2) ANNOTATE_DATA_SPECIAL: This is a lightweight objtool annotation which
+ *    points to the beginning of each entry.  The size of the entry is then
+ *    inferred by the location of the subsequent annotation (or end of
+ *    section).
+ *
+ * 3) Simple array of pointers: If the special section is just a basic array of
+ *    pointers, the entry size can be inferred by the number of relocations.
+ *    No annotations needed.
+ *
+ * Note I also tried to create per-entry symbols at the time of creation, in
+ * the original [inline] asm.  Unfortunately, creating uniquely named symbols
+ * is trickier than one might think, especially with Clang inline asm.  I
+ * eventually just gave up trying to make that work, in favor of using
+ * ANNOTATE_DATA_SPECIAL and creating the symbols here after the fact.
+ */
+static int create_fake_symbols(struct elf *elf)
+{
+	struct section *sec;
+	struct reloc *reloc;
+
+	/*
+	 * 1) Make symbols for all the ANNOTATE_DATA_SPECIAL entries:
+	 */
+
+	sec = find_section_by_name(elf, ".discard.annotate_data");
+	if (!sec || !sec->rsec)
+		return 0;
+
+	for_each_reloc(sec->rsec, reloc) {
+		unsigned long offset, size;
+		struct reloc *next_reloc;
+
+		if (annotype(elf, sec, reloc) != ANNOTYPE_DATA_SPECIAL)
+			continue;
+
+		offset = reloc_addend(reloc);
+
+		size = 0;
+		next_reloc = reloc;
+		for_each_reloc_continue(sec->rsec, next_reloc) {
+			if (annotype(elf, sec, next_reloc) != ANNOTYPE_DATA_SPECIAL ||
+			    next_reloc->sym->sec != reloc->sym->sec)
+				continue;
+
+			size = reloc_addend(next_reloc) - offset;
+			break;
+		}
+
+		if (!size)
+			size = sec_size(reloc->sym->sec) - offset;
+
+		if (create_fake_symbol(elf, reloc->sym->sec, offset, size))
+			return -1;
+	}
+
+	/*
+	 * 2) Make symbols for sh_entsize, and simple arrays of pointers:
+	 */
+
+	for_each_sec(elf, sec) {
+		unsigned int entry_size;
+		unsigned long offset;
+
+		if (!is_special_section(sec) || find_symbol_by_offset(sec, 0))
+			continue;
+
+		if (!sec->rsec) {
+			ERROR("%s: missing special section relocations", sec->name);
+			return -1;
+		}
+
+		entry_size = sec->sh.sh_entsize;
+		if (!entry_size) {
+			entry_size = arch_reloc_size(sec->rsec->relocs);
+			if (sec_size(sec) != entry_size * sec_num_entries(sec->rsec)) {
+				ERROR("%s: missing special section entsize or annotations", sec->name);
+				return -1;
+			}
+		}
+
+		for (offset = 0; offset < sec_size(sec); offset += entry_size) {
+			if (create_fake_symbol(elf, sec, offset, entry_size))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+/* Keep a special section entry if it references an included function */
+static bool should_keep_special_sym(struct elf *elf, struct symbol *sym)
+{
+	struct reloc *reloc;
+
+	if (is_sec_sym(sym) || !sym->sec->rsec)
+		return false;
+
+	sym_for_each_reloc(elf, sym, reloc) {
+		if (convert_reloc_sym(elf, reloc))
+			continue;
+
+		if (is_func_sym(reloc->sym) && reloc->sym->included)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * Klp relocations aren't allowed for __jump_table and .static_call_sites if
+ * the referenced symbol lives in a kernel module, because such klp relocs may
+ * be applied after static branch/call init, resulting in code corruption.
+ *
+ * Validate a special section entry to avoid that.  Note that an inert
+ * tracepoint is harmless enough, in that case just skip the entry and print a
+ * warning.  Otherwise, return an error.
+ *
+ * This is only a temporary limitation which will be fixed when livepatch adds
+ * support for submodules: fully self-contained modules which are embedded in
+ * the top-level livepatch module's data and which can be loaded on demand when
+ * their corresponding to-be-patched module gets loaded.  Then klp relocs can
+ * be retired.
+ *
+ * Return:
+ *   -1: error: validation failed
+ *    1: warning: tracepoint skipped
+ *    0: success
+ */
+static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym)
+{
+	bool static_branch = !strcmp(sym->sec->name, "__jump_table");
+	bool static_call   = !strcmp(sym->sec->name, ".static_call_sites");
+	struct symbol *code_sym = NULL;
+	unsigned long code_offset = 0;
+	struct reloc *reloc;
+	int ret = 0;
+
+	if (!static_branch && !static_call)
+		return 0;
+
+	sym_for_each_reloc(e->patched, sym, reloc) {
+		const char *sym_modname;
+		struct export *export;
+
+		/* Static branch/call keys are always STT_OBJECT */
+		if (reloc->sym->type != STT_OBJECT) {
+
+			/* Save code location which can be printed below */
+			if (reloc->sym->type == STT_FUNC && !code_sym) {
+				code_sym = reloc->sym;
+				code_offset = reloc_addend(reloc);
+			}
+
+			continue;
+		}
+
+		if (!klp_reloc_needed(reloc))
+			continue;
+
+		export = find_export(reloc->sym);
+		if (export) {
+			sym_modname = export->mod;
+		} else {
+			sym_modname = find_modname(e);
+			if (!sym_modname)
+				return -1;
+		}
+
+		/* vmlinux keys are ok */
+		if (!strcmp(sym_modname, "vmlinux"))
+			continue;
+
+		if (static_branch) {
+			if (strstarts(reloc->sym->name, "__tracepoint_")) {
+				WARN("%s: disabling unsupported tracepoint %s",
+				     code_sym->name, reloc->sym->name + 13);
+				ret = 1;
+				continue;
+			}
+
+			ERROR("%s+0x%lx: unsupported static branch key %s.  Use static_key_enabled() instead",
+			      code_sym->name, code_offset, reloc->sym->name);
+			return -1;
+		}
+
+		/* static call */
+		if (strstarts(reloc->sym->name, "__SCK__tp_func_")) {
+			ret = 1;
+			continue;
+		}
+
+		ERROR("%s()+0x%lx: unsupported static call key %s.  Use KLP_STATIC_CALL() instead",
+		      code_sym->name, code_offset, reloc->sym->name);
+		return -1;
+	}
+
+	return ret;
+}
+
+static int clone_special_section(struct elfs *e, struct section *patched_sec)
+{
+	struct symbol *patched_sym;
+
+	/*
+	 * Extract all special section symbols (and their dependencies) which
+	 * reference included functions.
+	 */
+	sec_for_each_sym(patched_sec, patched_sym) {
+		int ret;
+
+		if (!is_object_sym(patched_sym))
+			continue;
+
+		if (!should_keep_special_sym(e->patched, patched_sym))
+			continue;
+
+		ret = validate_special_section_klp_reloc(e, patched_sym);
+		if (ret < 0)
+			return -1;
+		if (ret > 0)
+			continue;
+
+		if (!clone_symbol(e, patched_sym, true))
+			return -1;
+	}
+
+	return 0;
+}
+
+/* Extract only the needed bits from special sections */
+static int clone_special_sections(struct elfs *e)
+{
+	struct section *patched_sec;
+
+	if (create_fake_symbols(e->patched))
+		return -1;
+
+	for_each_sec(e->patched, patched_sec) {
+		if (is_special_section(patched_sec)) {
+			if (clone_special_section(e, patched_sec))
+				return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Create __klp_objects and __klp_funcs sections which are intermediate
+ * sections provided as input to the patch module's init code for building the
+ * klp_patch, klp_object and klp_func structs for the livepatch API.
+ */
+static int create_klp_sections(struct elfs *e)
+{
+	size_t obj_size  = sizeof(struct klp_object_ext);
+	size_t func_size = sizeof(struct klp_func_ext);
+	struct section *obj_sec, *funcs_sec, *str_sec;
+	struct symbol *funcs_sym, *str_sym, *sym;
+	char sym_name[SYM_NAME_LEN];
+	unsigned int nr_funcs = 0;
+	const char *modname;
+	void *obj_data;
+	s64 addend;
+
+	obj_sec  = elf_create_section_pair(e->out, KLP_OBJECTS_SEC, obj_size, 0, 0);
+	if (!obj_sec)
+		return -1;
+
+	funcs_sec = elf_create_section_pair(e->out, KLP_FUNCS_SEC, func_size, 0, 0);
+	if (!funcs_sec)
+		return -1;
+
+	funcs_sym = elf_create_section_symbol(e->out, funcs_sec);
+	if (!funcs_sym)
+		return -1;
+
+	str_sec = elf_create_section(e->out, KLP_STRINGS_SEC, 0, 0,
+				     SHT_PROGBITS, 1,
+				     SHF_ALLOC | SHF_STRINGS | SHF_MERGE);
+	if (!str_sec)
+		return -1;
+
+	if (elf_add_string(e->out, str_sec, "") == -1)
+		return -1;
+
+	str_sym = elf_create_section_symbol(e->out, str_sec);
+	if (!str_sym)
+		return -1;
+
+	/* allocate klp_object_ext */
+	obj_data = elf_add_data(e->out, obj_sec, NULL, obj_size);
+	if (!obj_data)
+		return -1;
+
+	modname = find_modname(e);
+	if (!modname)
+		return -1;
+
+	/* klp_object_ext.name */
+	if (strcmp(modname, "vmlinux")) {
+		addend = elf_add_string(e->out, str_sec, modname);
+		if (addend == -1)
+			return -1;
+
+		if (!elf_create_reloc(e->out, obj_sec,
+				      offsetof(struct klp_object_ext, name),
+				      str_sym, addend, R_ABS64))
+			return -1;
+	}
+
+	/* klp_object_ext.funcs */
+	if (!elf_create_reloc(e->out, obj_sec, offsetof(struct klp_object_ext, funcs),
+			      funcs_sym, 0, R_ABS64))
+		return -1;
+
+	for_each_sym(e->out, sym) {
+		unsigned long offset = nr_funcs * func_size;
+		unsigned long sympos;
+		void *func_data;
+
+		if (!is_func_sym(sym) || sym->cold || !sym->clone || !sym->clone->changed)
+			continue;
+
+		/* allocate klp_func_ext */
+		func_data = elf_add_data(e->out, funcs_sec, NULL, func_size);
+		if (!func_data)
+			return -1;
+
+		/* klp_func_ext.old_name */
+		addend = elf_add_string(e->out, str_sec, sym->clone->twin->name);
+		if (addend == -1)
+			return -1;
+
+		if (!elf_create_reloc(e->out, funcs_sec,
+				      offset + offsetof(struct klp_func_ext, old_name),
+				      str_sym, addend, R_ABS64))
+			return -1;
+
+		/* klp_func_ext.new_func */
+		if (!elf_create_reloc(e->out, funcs_sec,
+				      offset + offsetof(struct klp_func_ext, new_func),
+				      sym, 0, R_ABS64))
+			return -1;
+
+		/* klp_func_ext.sympos */
+		BUILD_BUG_ON(sizeof(sympos) != sizeof_field(struct klp_func_ext, sympos));
+		sympos = find_sympos(e->orig, sym->clone->twin);
+		if (sympos == ULONG_MAX)
+			return -1;
+		memcpy(func_data + offsetof(struct klp_func_ext, sympos), &sympos,
+		       sizeof_field(struct klp_func_ext, sympos));
+
+		nr_funcs++;
+	}
+
+	/* klp_object_ext.nr_funcs */
+	BUILD_BUG_ON(sizeof(nr_funcs) != sizeof_field(struct klp_object_ext, nr_funcs));
+	memcpy(obj_data + offsetof(struct klp_object_ext, nr_funcs), &nr_funcs,
+	       sizeof_field(struct klp_object_ext, nr_funcs));
+
+	/*
+	 * Find callback pointers created by KLP_PRE_PATCH_CALLBACK() and
+	 * friends, and add them to the klp object.
+	 */
+
+	if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_PATCH_PREFIX "%s", modname))
+		return -1;
+
+	sym = find_symbol_by_name(e->out, sym_name);
+	if (sym) {
+		struct reloc *reloc;
+
+		reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+		if (!elf_create_reloc(e->out, obj_sec,
+				      offsetof(struct klp_object_ext, callbacks) +
+				      offsetof(struct klp_callbacks, pre_patch),
+				      reloc->sym, reloc_addend(reloc), R_ABS64))
+			return -1;
+	}
+
+	if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_PATCH_PREFIX "%s", modname))
+		return -1;
+
+	sym = find_symbol_by_name(e->out, sym_name);
+	if (sym) {
+		struct reloc *reloc;
+
+		reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+		if (!elf_create_reloc(e->out, obj_sec,
+				      offsetof(struct klp_object_ext, callbacks) +
+				      offsetof(struct klp_callbacks, post_patch),
+				      reloc->sym, reloc_addend(reloc), R_ABS64))
+			return -1;
+	}
+
+	if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_UNPATCH_PREFIX "%s", modname))
+		return -1;
+
+	sym = find_symbol_by_name(e->out, sym_name);
+	if (sym) {
+		struct reloc *reloc;
+
+		reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+		if (!elf_create_reloc(e->out, obj_sec,
+				      offsetof(struct klp_object_ext, callbacks) +
+				      offsetof(struct klp_callbacks, pre_unpatch),
+				      reloc->sym, reloc_addend(reloc), R_ABS64))
+			return -1;
+	}
+
+	if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_UNPATCH_PREFIX "%s", modname))
+		return -1;
+
+	sym = find_symbol_by_name(e->out, sym_name);
+	if (sym) {
+		struct reloc *reloc;
+
+		reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+		if (!elf_create_reloc(e->out, obj_sec,
+				      offsetof(struct klp_object_ext, callbacks) +
+				      offsetof(struct klp_callbacks, post_unpatch),
+				      reloc->sym, reloc_addend(reloc), R_ABS64))
+			return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Copy all .modinfo import_ns= tags to ensure all namespaced exported symbols
+ * can be accessed via normal relocs.
+ */
+static int copy_import_ns(struct elfs *e)
+{
+	struct section *patched_sec, *out_sec = NULL;
+	char *import_ns, *data_end;
+
+	patched_sec = find_section_by_name(e->patched, ".modinfo");
+	if (!patched_sec)
+		return 0;
+
+	import_ns = patched_sec->data->d_buf;
+	if (!import_ns)
+		return 0;
+
+	for (data_end = import_ns + sec_size(patched_sec);
+	     import_ns < data_end;
+	     import_ns += strlen(import_ns) + 1) {
+
+		import_ns = memmem(import_ns, data_end - import_ns, "import_ns=", 10);
+		if (!import_ns)
+			return 0;
+
+		if (!out_sec) {
+			out_sec = find_section_by_name(e->out, ".modinfo");
+			if (!out_sec) {
+				out_sec = elf_create_section(e->out, ".modinfo", 0,
+							     patched_sec->sh.sh_entsize,
+							     patched_sec->sh.sh_type,
+							     patched_sec->sh.sh_addralign,
+							     patched_sec->sh.sh_flags);
+				if (!out_sec)
+					return -1;
+			}
+		}
+
+		if (!elf_add_data(e->out, out_sec, import_ns, strlen(import_ns) + 1))
+			return -1;
+	}
+
+	return 0;
+}
+
+int cmd_klp_diff(int argc, const char **argv)
+{
+	struct elfs e = {0};
+
+	argc = parse_options(argc, argv, klp_diff_options, klp_diff_usage, 0);
+	if (argc != 3)
+		usage_with_options(klp_diff_usage, klp_diff_options);
+
+	objname = argv[0];
+
+	e.orig = elf_open_read(argv[0], O_RDONLY);
+	e.patched = elf_open_read(argv[1], O_RDONLY);
+	e.out = NULL;
+
+	if (!e.orig || !e.patched)
+		return -1;
+
+	if (read_exports())
+		return -1;
+
+	if (read_sym_checksums(e.orig))
+		return -1;
+
+	if (read_sym_checksums(e.patched))
+		return -1;
+
+	if (correlate_symbols(&e))
+		return -1;
+
+	if (mark_changed_functions(&e))
+		return 0;
+
+	e.out = elf_create_file(&e.orig->ehdr, argv[2]);
+	if (!e.out)
+		return -1;
+
+	if (clone_included_functions(&e))
+		return -1;
+
+	if (clone_special_sections(&e))
+		return -1;
+
+	if (create_klp_sections(&e))
+		return -1;
+
+	if (copy_import_ns(&e))
+		return -1;
+
+	if  (elf_write(e.out))
+		return -1;
+
+	return elf_close(e.out);
+}
diff --git a/tools/objtool/klp-post-link.c b/tools/objtool/klp-post-link.c
new file mode 100644
index 000000000000..c013e39957b1
--- /dev/null
+++ b/tools/objtool/klp-post-link.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Read the intermediate KLP reloc/symbol representations created by klp diff
+ * and convert them to the proper format required by livepatch.  This needs to
+ * run last to avoid linker wreckage.  Linkers don't tend to handle the "two
+ * rela sections for a single base section" case very well, nor do they like
+ * SHN_LIVEPATCH.
+ *
+ * This is the final tool in the livepatch module generation pipeline:
+ *
+ *   kernel builds -> objtool klp diff -> module link -> objtool klp post-link
+ */
+
+#include <fcntl.h>
+#include <gelf.h>
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/klp.h>
+#include <objtool/util.h>
+#include <linux/livepatch_external.h>
+
+static int fix_klp_relocs(struct elf *elf)
+{
+	struct section *symtab, *klp_relocs;
+
+	klp_relocs = find_section_by_name(elf, KLP_RELOCS_SEC);
+	if (!klp_relocs)
+		return 0;
+
+	symtab = find_section_by_name(elf, ".symtab");
+	if (!symtab) {
+		ERROR("missing .symtab");
+		return -1;
+	}
+
+	for (int i = 0; i < sec_size(klp_relocs) / sizeof(struct klp_reloc); i++) {
+		struct klp_reloc *klp_reloc;
+		unsigned long klp_reloc_off;
+		struct section *sec, *tmp, *klp_rsec;
+		unsigned long offset;
+		struct reloc *reloc;
+		char sym_modname[64];
+		char rsec_name[SEC_NAME_LEN];
+		u64 addend;
+		struct symbol *sym, *klp_sym;
+
+		klp_reloc_off = i * sizeof(*klp_reloc);
+		klp_reloc = klp_relocs->data->d_buf + klp_reloc_off;
+
+		/*
+		 * Read __klp_relocs[i]:
+		 */
+
+		/* klp_reloc.sec_offset */
+		reloc = find_reloc_by_dest(elf, klp_relocs,
+					   klp_reloc_off + offsetof(struct klp_reloc, offset));
+		if (!reloc) {
+			ERROR("malformed " KLP_RELOCS_SEC " section");
+			return -1;
+		}
+
+		sec = reloc->sym->sec;
+		offset = reloc_addend(reloc);
+
+		/* klp_reloc.sym */
+		reloc = find_reloc_by_dest(elf, klp_relocs,
+					   klp_reloc_off + offsetof(struct klp_reloc, sym));
+		if (!reloc) {
+			ERROR("malformed " KLP_RELOCS_SEC " section");
+			return -1;
+		}
+
+		klp_sym = reloc->sym;
+		addend = reloc_addend(reloc);
+
+		/* symbol format: .klp.sym.modname.sym_name,sympos */
+		if (sscanf(klp_sym->name + strlen(KLP_SYM_PREFIX), "%55[^.]", sym_modname) != 1)
+			ERROR("can't find modname in klp symbol '%s'", klp_sym->name);
+
+		/*
+		 * Create the KLP rela:
+		 */
+
+		/* section format: .klp.rela.sec_objname.section_name */
+		if (snprintf_check(rsec_name, SEC_NAME_LEN,
+				   KLP_RELOC_SEC_PREFIX "%s.%s",
+				   sym_modname, sec->name))
+			return -1;
+
+		klp_rsec = find_section_by_name(elf, rsec_name);
+		if (!klp_rsec) {
+			klp_rsec = elf_create_section(elf, rsec_name, 0,
+						      elf_rela_size(elf),
+						      SHT_RELA, elf_addr_size(elf),
+						      SHF_ALLOC | SHF_INFO_LINK | SHF_RELA_LIVEPATCH);
+			if (!klp_rsec)
+				return -1;
+
+			klp_rsec->sh.sh_link = symtab->idx;
+			klp_rsec->sh.sh_info = sec->idx;
+			klp_rsec->base = sec;
+		}
+
+		tmp = sec->rsec;
+		sec->rsec = klp_rsec;
+		if (!elf_create_reloc(elf, sec, offset, klp_sym, addend, klp_reloc->type))
+			return -1;
+		sec->rsec = tmp;
+
+		/*
+		 * Fix up the corresponding KLP symbol:
+		 */
+
+		klp_sym->sym.st_shndx = SHN_LIVEPATCH;
+		if (!gelf_update_sym(symtab->data, klp_sym->idx, &klp_sym->sym)) {
+			ERROR_ELF("gelf_update_sym");
+			return -1;
+		}
+
+		/*
+		 * Disable the original non-KLP reloc by converting it to R_*_NONE:
+		 */
+
+		reloc = find_reloc_by_dest(elf, sec, offset);
+		sym = reloc->sym;
+		sym->sym.st_shndx = SHN_LIVEPATCH;
+		set_reloc_type(elf, reloc, 0);
+		if (!gelf_update_sym(symtab->data, sym->idx, &sym->sym)) {
+			ERROR_ELF("gelf_update_sym");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * This runs on the livepatch module after all other linking has been done.  It
+ * converts the intermediate __klp_relocs section into proper KLP relocs to be
+ * processed by livepatch.  This needs to run last to avoid linker wreckage.
+ * Linkers don't tend to handle the "two rela sections for a single base
+ * section" case very well, nor do they appreciate SHN_LIVEPATCH.
+ */
+int cmd_klp_post_link(int argc, const char **argv)
+{
+	struct elf *elf;
+
+	argc--;
+	argv++;
+
+	if (argc != 1) {
+		fprintf(stderr, "%d\n", argc);
+		fprintf(stderr, "usage: objtool link <file.ko>\n");
+		return -1;
+	}
+
+	elf = elf_open_read(argv[0], O_RDWR);
+	if (!elf)
+		return -1;
+
+	if (fix_klp_relocs(elf))
+		return -1;
+
+	if (elf_write(elf))
+		return -1;
+
+	return elf_close(elf);
+}
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 802895fae3ca..14f8ab653449 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -36,6 +36,7 @@ NORETURN(machine_real_restart)
 NORETURN(make_task_dead)
 NORETURN(mpt_halt_firmware)
 NORETURN(mwait_play_dead)
+NORETURN(native_play_dead)
 NORETURN(nmi_panic_self_stop)
 NORETURN(panic)
 NORETURN(vpanic)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 5c8b974ad0f9..3c26ed561c7e 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -16,7 +16,8 @@
 #include <objtool/objtool.h>
 #include <objtool/warn.h>
 
-bool help;
+bool debug;
+int indent;
 
 static struct objtool_file file;
 
@@ -71,6 +72,39 @@ int objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
 	return 0;
 }
 
+char *top_level_dir(const char *file)
+{
+	ssize_t len, self_len, file_len;
+	char self[PATH_MAX], *str;
+	int i;
+
+	len = readlink("/proc/self/exe", self, sizeof(self) - 1);
+	if (len <= 0)
+		return NULL;
+	self[len] = '\0';
+
+	for (i = 0; i < 3; i++) {
+		char *s = strrchr(self, '/');
+		if (!s)
+			return NULL;
+		*s = '\0';
+	}
+
+	self_len = strlen(self);
+	file_len = strlen(file);
+
+	str = malloc(self_len + file_len + 2);
+	if (!str)
+		return NULL;
+
+	memcpy(str, self, self_len);
+	str[self_len] = '/';
+	strcpy(str + self_len + 1, file);
+
+	return str;
+}
+
+
 int main(int argc, const char **argv)
 {
 	static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED";
@@ -79,5 +113,11 @@ int main(int argc, const char **argv)
 	exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED);
 	pager_init(UNUSED);
 
+	if (argc > 1 && !strcmp(argv[1], "klp")) {
+		argc--;
+		argv++;
+		return cmd_klp(argc, argv);
+	}
+
 	return objtool_run(argc, argv);
 }
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 1dd9fc18fe62..5a979f52425a 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -8,7 +8,6 @@
 #include <objtool/objtool.h>
 #include <objtool/orc.h>
 #include <objtool/warn.h>
-#include <objtool/endianness.h>
 
 int orc_dump(const char *filename)
 {
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 922e6aac7cea..1045e1380ffd 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -12,7 +12,6 @@
 #include <objtool/check.h>
 #include <objtool/orc.h>
 #include <objtool/warn.h>
-#include <objtool/endianness.h>
 
 struct orc_list_entry {
 	struct list_head list;
@@ -57,7 +56,7 @@ int orc_create(struct objtool_file *file)
 
 	/* Build a deduplicated list of ORC entries: */
 	INIT_LIST_HEAD(&orc_list);
-	for_each_sec(file, sec) {
+	for_each_sec(file->elf, sec) {
 		struct orc_entry orc, prev_orc = {0};
 		struct instruction *insn;
 		bool empty = true;
@@ -127,7 +126,11 @@ int orc_create(struct objtool_file *file)
 		return -1;
 	}
 	orc_sec = elf_create_section(file->elf, ".orc_unwind",
-				     sizeof(struct orc_entry), nr);
+				     nr * sizeof(struct orc_entry),
+				     sizeof(struct orc_entry),
+				     SHT_PROGBITS,
+				     1,
+				     SHF_ALLOC);
 	if (!orc_sec)
 		return -1;
 
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index c80fed8a840e..2a533afbc69a 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -15,7 +15,6 @@
 #include <objtool/builtin.h>
 #include <objtool/special.h>
 #include <objtool/warn.h>
-#include <objtool/endianness.h>
 
 struct special_entry {
 	const char *sec;
@@ -82,6 +81,8 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
 						   entry->orig_len);
 		alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
 						  entry->new_len);
+		alt->feature = *(unsigned int *)(sec->data->d_buf + offset +
+						 entry->feature);
 	}
 
 	orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
@@ -133,7 +134,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
 	struct section *sec;
 	unsigned int nr_entries;
 	struct special_alt *alt;
-	int idx, ret;
+	int idx;
 
 	INIT_LIST_HEAD(alts);
 
@@ -142,12 +143,12 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
 		if (!sec)
 			continue;
 
-		if (sec->sh.sh_size % entry->size != 0) {
+		if (sec_size(sec) % entry->size != 0) {
 			ERROR("%s size not a multiple of %d", sec->name, entry->size);
 			return -1;
 		}
 
-		nr_entries = sec->sh.sh_size / entry->size;
+		nr_entries = sec_size(sec) / entry->size;
 
 		for (idx = 0; idx < nr_entries; idx++) {
 			alt = malloc(sizeof(*alt));
@@ -157,11 +158,8 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
 			}
 			memset(alt, 0, sizeof(*alt));
 
-			ret = get_alt_entry(elf, entry, sec, idx, alt);
-			if (ret > 0)
-				continue;
-			if (ret < 0)
-				return ret;
+			if (get_alt_entry(elf, entry, sec, idx, alt))
+				return -1;
 
 			list_add_tail(&alt->list, alts);
 		}
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 81d120d05442..e38167ca56a9 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -16,6 +16,8 @@ arch/x86/include/asm/orc_types.h
 arch/x86/include/asm/emulate_prefix.h
 arch/x86/lib/x86-opcode-map.txt
 arch/x86/tools/gen-insn-attr-x86.awk
+include/linux/interval_tree_generic.h
+include/linux/livepatch_external.h
 include/linux/static_call_types.h
 "
 
diff --git a/tools/objtool/trace.c b/tools/objtool/trace.c
new file mode 100644
index 000000000000..5dec44dab781
--- /dev/null
+++ b/tools/objtool/trace.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#include <objtool/trace.h>
+
+bool trace;
+int trace_depth;
+
+/*
+ * Macros to trace CFI state attributes changes.
+ */
+
+#define TRACE_CFI_ATTR(attr, prev, next, fmt, ...)		\
+({								\
+	if ((prev)->attr != (next)->attr)			\
+		TRACE("%s=" fmt " ", #attr, __VA_ARGS__);	\
+})
+
+#define TRACE_CFI_ATTR_BOOL(attr, prev, next)			\
+	TRACE_CFI_ATTR(attr, prev, next,			\
+		       "%s", (next)->attr ? "true" : "false")
+
+#define TRACE_CFI_ATTR_NUM(attr, prev, next, fmt)		\
+	TRACE_CFI_ATTR(attr, prev, next, fmt, (next)->attr)
+
+#define CFI_REG_NAME_MAXLEN   16
+
+/*
+ * Return the name of a register. Note that the same static buffer
+ * is returned if the name is dynamically generated.
+ */
+static const char *cfi_reg_name(unsigned int reg)
+{
+	static char rname_buffer[CFI_REG_NAME_MAXLEN];
+	const char *rname;
+
+	switch (reg) {
+	case CFI_UNDEFINED:
+		return "<undefined>";
+	case CFI_CFA:
+		return "cfa";
+	case CFI_SP_INDIRECT:
+		return "(sp)";
+	case CFI_BP_INDIRECT:
+		return "(bp)";
+	}
+
+	if (reg < CFI_NUM_REGS) {
+		rname = arch_reg_name[reg];
+		if (rname)
+			return rname;
+	}
+
+	if (snprintf(rname_buffer, CFI_REG_NAME_MAXLEN, "r%d", reg) == -1)
+		return "<error>";
+
+	return (const char *)rname_buffer;
+}
+
+/*
+ * Functions and macros to trace CFI registers changes.
+ */
+
+static void trace_cfi_reg(const char *prefix, int reg, const char *fmt,
+			  int base_prev, int offset_prev,
+			  int base_next, int offset_next)
+{
+	char *rname;
+
+	if (base_prev == base_next && offset_prev == offset_next)
+		return;
+
+	if (prefix)
+		TRACE("%s:", prefix);
+
+	if (base_next == CFI_UNDEFINED) {
+		TRACE("%1$s=<undef> ", cfi_reg_name(reg));
+	} else {
+		rname = strdup(cfi_reg_name(reg));
+		TRACE(fmt, rname, cfi_reg_name(base_next), offset_next);
+		free(rname);
+	}
+}
+
+static void trace_cfi_reg_val(const char *prefix, int reg,
+			      int base_prev, int offset_prev,
+			      int base_next, int offset_next)
+{
+	trace_cfi_reg(prefix, reg, "%1$s=%2$s%3$+d ",
+		      base_prev, offset_prev, base_next, offset_next);
+}
+
+static void trace_cfi_reg_ref(const char *prefix, int reg,
+			      int base_prev, int offset_prev,
+			      int base_next, int offset_next)
+{
+	trace_cfi_reg(prefix, reg, "%1$s=(%2$s%3$+d) ",
+		      base_prev, offset_prev, base_next, offset_next);
+}
+
+#define TRACE_CFI_REG_VAL(reg, prev, next)				\
+	trace_cfi_reg_val(NULL, reg, prev.base, prev.offset,		\
+			  next.base, next.offset)
+
+#define TRACE_CFI_REG_REF(reg, prev, next)				\
+	trace_cfi_reg_ref(NULL, reg, prev.base, prev.offset,		\
+			  next.base, next.offset)
+
+void trace_insn_state(struct instruction *insn, struct insn_state *sprev,
+		      struct insn_state *snext)
+{
+	struct cfi_state *cprev, *cnext;
+	int i;
+
+	if (!memcmp(sprev, snext, sizeof(struct insn_state)))
+		return;
+
+	cprev = &sprev->cfi;
+	cnext = &snext->cfi;
+
+	disas_print_insn(stderr, objtool_disas_ctx, insn,
+			 trace_depth - 1, "state: ");
+
+	/* print registers changes */
+	TRACE_CFI_REG_VAL(CFI_CFA, cprev->cfa, cnext->cfa);
+	for (i = 0; i < CFI_NUM_REGS; i++) {
+		TRACE_CFI_REG_VAL(i, cprev->vals[i], cnext->vals[i]);
+		TRACE_CFI_REG_REF(i, cprev->regs[i], cnext->regs[i]);
+	}
+
+	/* print attributes changes */
+	TRACE_CFI_ATTR_NUM(stack_size, cprev, cnext, "%d");
+	TRACE_CFI_ATTR_BOOL(drap, cprev, cnext);
+	if (cnext->drap) {
+		trace_cfi_reg_val("drap", cnext->drap_reg,
+				  cprev->drap_reg, cprev->drap_offset,
+				  cnext->drap_reg, cnext->drap_offset);
+	}
+	TRACE_CFI_ATTR_BOOL(bp_scratch, cprev, cnext);
+	TRACE_CFI_ATTR_NUM(instr, sprev, snext, "%d");
+	TRACE_CFI_ATTR_NUM(uaccess_stack, sprev, snext, "%u");
+
+	TRACE("\n");
+
+	insn->trace = 1;
+}
+
+void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt,
+		     char *alt_name)
+{
+	struct instruction *alt_insn;
+	char suffix[2];
+
+	alt_insn = alt->insn;
+
+	if (alt->type == ALT_TYPE_EX_TABLE) {
+		/*
+		 * When there is an exception table then the instruction
+		 * at the original location is executed but it can cause
+		 * an exception. In that case, the execution will be
+		 * redirected to the alternative instruction.
+		 *
+		 * The instruction at the original location can have
+		 * instruction alternatives, so we just print the location
+		 * of the instruction that can cause the exception and
+		 * not the instruction itself.
+		 */
+		TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s for instruction at 0x%lx <%s+0x%lx>",
+				      alt_name,
+				      orig_insn->offset, orig_insn->sym->name,
+				      orig_insn->offset - orig_insn->sym->offset);
+	} else {
+		TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s", alt_name);
+	}
+
+	if (alt->type == ALT_TYPE_JUMP_TABLE) {
+		/*
+		 * For a jump alternative, if the default instruction is
+		 * a NOP then it is replaced with the jmp instruction,
+		 * otherwise it is replaced with a NOP instruction.
+		 */
+		trace_depth++;
+		if (orig_insn->type == INSN_NOP) {
+			suffix[0] = (orig_insn->len == 5) ? 'q' : '\0';
+			TRACE_ADDR(orig_insn, "jmp%-3s %lx <%s+0x%lx>", suffix,
+				   alt_insn->offset, alt_insn->sym->name,
+				   alt_insn->offset - alt_insn->sym->offset);
+		} else {
+			TRACE_ADDR(orig_insn, "nop%d", orig_insn->len);
+			trace_depth--;
+		}
+	}
+}
+
+void trace_alt_end(struct instruction *orig_insn, struct alternative *alt,
+		   char *alt_name)
+{
+	if (alt->type == ALT_TYPE_JUMP_TABLE && orig_insn->type == INSN_NOP)
+		trace_depth--;
+	TRACE_ALT_INFO_NOADDR(orig_insn, "\\ ", "%s", alt_name);
+}
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index d83f607733b0..d6562f292259 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -8,6 +8,8 @@
 #include <stdbool.h>
 #include <errno.h>
 #include <objtool/objtool.h>
+#include <objtool/arch.h>
+#include <objtool/builtin.h>
 
 #define UNSUPPORTED(name)						\
 ({									\
@@ -24,3 +26,8 @@ int __weak orc_create(struct objtool_file *file)
 {
 	UNSUPPORTED("ORC");
 }
+
+int __weak cmd_klp(int argc, const char **argv)
+{
+	UNSUPPORTED("klp");
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 5700516aa84a..2dd5f5a60568 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -354,9 +354,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
 
 FEATURE_CHECK_LDFLAGS-libaio = -lrt
 
-FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
-FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
-
 CORE_CFLAGS += -fno-omit-frame-pointer
 CORE_CFLAGS += -Wall
 CORE_CFLAGS += -Wextra
@@ -930,6 +927,8 @@ ifdef BUILD_NONDISTRO
 
   ifeq ($(feature-libbfd), 1)
     EXTLIBS += -lbfd -lopcodes
+    FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
+    FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
   else
     # we are on a system that requires -liberty and (maybe) -lz
     # to link against -lbfd; test each case individually here
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 47c906b807ef..02f87c49801f 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -234,12 +234,12 @@ endif
 # The fixdep build - we force fixdep tool to be built as
 # the first target in the separate make session not to be
 # disturbed by any parallel make jobs. Once fixdep is done
-# we issue the requested build with FIXDEP=1 variable.
+# we issue the requested build with FIXDEP_BUILT=1 variable.
 #
 # The fixdep build is disabled for $(NON_CONFIG_TARGETS)
 # targets, because it's not necessary.
 
-ifdef FIXDEP
+ifdef FIXDEP_BUILT
   force_fixdep := 0
 else
   force_fixdep := $(config)
@@ -286,7 +286,7 @@ $(goals) all: sub-make
 
 sub-make: fixdep
 	@./check-headers.sh
-	$(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
+	$(Q)$(MAKE) FIXDEP_BUILT=1 -f Makefile.perf $(goals)
 
 else # force_fixdep
 
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 92cf0fe2291e..ced2a1deecd7 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,7 @@
 333	common	io_pgetevents		sys_io_pgetevents
 334	common	rseq			sys_rseq
 335	common	uretprobe		sys_uretprobe
+336	common	uprobe			sys_uprobe
 # don't use numbers 387 through 423, add new calls after the last
 # 'common' entry
 424	common	pidfd_send_signal	sys_pidfd_send_signal
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 078634461df2..e8962c985d34 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1867,6 +1867,7 @@ static int __cmd_report(bool display_info)
 	eops.sample		 = process_sample_event;
 	eops.comm		 = perf_event__process_comm;
 	eops.mmap		 = perf_event__process_mmap;
+	eops.mmap2		 = perf_event__process_mmap2;
 	eops.namespaces		 = perf_event__process_namespaces;
 	eops.tracing_data	 = perf_event__process_tracing_data;
 	session = perf_session__new(&data, &eops);
@@ -2023,6 +2024,7 @@ static int __cmd_contention(int argc, const char **argv)
 	eops.sample		 = process_sample_event;
 	eops.comm		 = perf_event__process_comm;
 	eops.mmap		 = perf_event__process_mmap;
+	eops.mmap2		 = perf_event__process_mmap2;
 	eops.tracing_data	 = perf_event__process_tracing_data;
 
 	perf_env__init(&host_env);
diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh
index 7248a74ca2a3..6dd90519f45c 100755
--- a/tools/perf/tests/shell/lock_contention.sh
+++ b/tools/perf/tests/shell/lock_contention.sh
@@ -13,15 +13,18 @@ cleanup() {
 	rm -f ${perfdata}
 	rm -f ${result}
 	rm -f ${errout}
-	trap - EXIT TERM INT
+	trap - EXIT TERM INT ERR
 }
 
 trap_cleanup() {
+	if (( $? == 139 )); then #SIGSEGV
+		err=1
+	fi
 	echo "Unexpected signal in ${FUNCNAME[1]}"
 	cleanup
 	exit ${err}
 }
-trap trap_cleanup EXIT TERM INT
+trap trap_cleanup EXIT TERM INT ERR
 
 check() {
 	if [ "$(id -u)" != 0 ]; then
@@ -145,7 +148,7 @@ test_aggr_cgroup()
 	fi
 
 	# the perf lock contention output goes to the stderr
-	perf lock con -a -b -g -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result}
+	perf lock con -a -b --lock-cgroup -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result}
 	if [ "$(cat "${result}" | wc -l)" != "1" ]; then
 		echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)"
 		err=1
@@ -271,7 +274,7 @@ test_cgroup_filter()
 		return
 	fi
 
-	perf lock con -a -b -g -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result}
+	perf lock con -a -b --lock-cgroup -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result}
 	if [ "$(cat "${result}" | wc -l)" != "1" ]; then
 		echo "[Fail] BPF result should have a cgroup result:" "$(cat "${result}")"
 		err=1
@@ -279,7 +282,7 @@ test_cgroup_filter()
 	fi
 
 	cgroup=$(cat "${result}" | awk '{ print $3 }')
-	perf lock con -a -b -g -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result}
+	perf lock con -a -b --lock-cgroup -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result}
 	if [ "$(cat "${result}" | wc -l)" != "1" ]; then
 		echo "[Fail] BPF result should have a result with cgroup filter:" "$(cat "${cgroup}")"
 		err=1
@@ -338,4 +341,5 @@ test_aggr_task_stack_filter
 test_cgroup_filter
 test_csv_output
 
+cleanup
 exit ${err}
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 3b262487ec06..77d7c59f5d8b 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -34,10 +34,7 @@ typedef __kernel_sa_family_t	sa_family_t;
 
 struct sockaddr {
 	sa_family_t	sa_family;	/* address family, AF_xxx	*/
-	union {
-		char sa_data_min[14];		/* Minimum 14 bytes of protocol address	*/
-		DECLARE_FLEX_ARRAY(char, sa_data);
-	};
+	char		sa_data[14];	/* 14 bytes of protocol address	*/
 };
 
 struct linger {
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index f291ab4f94eb..3741ea1b73d8 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -111,6 +111,7 @@
 #define PIDFD_SELF_THREAD_GROUP		-10001 /* Current thread group leader. */
 
 #define FD_PIDFS_ROOT			-10002 /* Root of the pidfs filesystem */
+#define FD_NSFS_ROOT			-10003 /* Root of the nsfs filesystem */
 #define FD_INVALID			-10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */
 
 /* Generic flags for the *at(2) family of syscalls. */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 0bd678a4a10e..beb4c2d1e41c 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -430,10 +430,13 @@ typedef int __bitwise __kernel_rwf_t;
 /* buffered IO that drops the cache after reading or writing data */
 #define RWF_DONTCACHE	((__force __kernel_rwf_t)0x00000080)
 
+/* prevent pipe and socket writes from raising SIGPIPE */
+#define RWF_NOSIGNAL	((__force __kernel_rwf_t)0x00000100)
+
 /* mask of flags supported by the kernel */
 #define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
 			 RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
-			 RWF_DONTCACHE)
+			 RWF_DONTCACHE | RWF_NOSIGNAL)
 
 #define PROCFS_IOCTL_MAGIC 'f'
 
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index ed3aed264aeb..51c4e8c82b1e 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -177,7 +177,17 @@ struct prctl_mm_map {
 
 #define PR_GET_TID_ADDRESS	40
 
+/*
+ * Flags for PR_SET_THP_DISABLE are only applicable when disabling. Bit 0
+ * is reserved, so PR_GET_THP_DISABLE can return "1 | flags", to effectively
+ * return "1" when no flags were specified for PR_SET_THP_DISABLE.
+ */
 #define PR_SET_THP_DISABLE	41
+/*
+ * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE /
+ * VM_HUGEPAGE, MADV_COLLAPSE).
+ */
+# define PR_THP_DISABLE_EXCEPT_ADVISED	(1 << 1)
 #define PR_GET_THP_DISABLE	42
 
 /*
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4f2a6e10ed5c..4e12be579140 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1022,12 +1022,9 @@ static int write_bpf_prog_info(struct feat_fd *ff,
 
 	down_read(&env->bpf_progs.lock);
 
-	if (env->bpf_progs.infos_cnt == 0)
-		goto out;
-
 	ret = do_write(ff, &env->bpf_progs.infos_cnt,
 		       sizeof(env->bpf_progs.infos_cnt));
-	if (ret < 0)
+	if (ret < 0 || env->bpf_progs.infos_cnt == 0)
 		goto out;
 
 	root = &env->bpf_progs.infos;
@@ -1067,13 +1064,10 @@ static int write_bpf_btf(struct feat_fd *ff,
 
 	down_read(&env->bpf_progs.lock);
 
-	if (env->bpf_progs.btfs_cnt == 0)
-		goto out;
-
 	ret = do_write(ff, &env->bpf_progs.btfs_cnt,
 		       sizeof(env->bpf_progs.btfs_cnt));
 
-	if (ret < 0)
+	if (ret < 0 || env->bpf_progs.btfs_cnt == 0)
 		goto out;
 
 	root = &env->bpf_progs.btfs;
diff --git a/tools/perf/util/libbfd.c b/tools/perf/util/libbfd.c
index 01147fbf73b3..6434c2dccd4a 100644
--- a/tools/perf/util/libbfd.c
+++ b/tools/perf/util/libbfd.c
@@ -38,6 +38,39 @@ struct a2l_data {
 	asymbol **syms;
 };
 
+static bool perf_bfd_lock(void *bfd_mutex)
+{
+	mutex_lock(bfd_mutex);
+	return true;
+}
+
+static bool perf_bfd_unlock(void *bfd_mutex)
+{
+	mutex_unlock(bfd_mutex);
+	return true;
+}
+
+static void perf_bfd_init(void)
+{
+	static struct mutex bfd_mutex;
+
+	mutex_init_recursive(&bfd_mutex);
+
+	if (bfd_init() != BFD_INIT_MAGIC) {
+		pr_err("Error initializing libbfd\n");
+		return;
+	}
+	if (!bfd_thread_init(perf_bfd_lock, perf_bfd_unlock, &bfd_mutex))
+		pr_err("Error initializing libbfd threading\n");
+}
+
+static void ensure_bfd_init(void)
+{
+	static pthread_once_t bfd_init_once = PTHREAD_ONCE_INIT;
+
+	pthread_once(&bfd_init_once, perf_bfd_init);
+}
+
 static int bfd_error(const char *string)
 {
 	const char *errmsg;
@@ -132,6 +165,7 @@ static struct a2l_data *addr2line_init(const char *path)
 	bfd *abfd;
 	struct a2l_data *a2l = NULL;
 
+	ensure_bfd_init();
 	abfd = bfd_openr(path, NULL);
 	if (abfd == NULL)
 		return NULL;
@@ -288,6 +322,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
 	bfd *abfd;
 	u64 start, len;
 
+	ensure_bfd_init();
 	abfd = bfd_openr(debugfile, NULL);
 	if (!abfd)
 		return -1;
@@ -393,6 +428,7 @@ int libbfd__read_build_id(const char *filename, struct build_id *bid, bool block
 	if (fd < 0)
 		return -1;
 
+	ensure_bfd_init();
 	abfd = bfd_fdopenr(filename, /*target=*/NULL, fd);
 	if (!abfd)
 		return -1;
@@ -421,6 +457,7 @@ int libbfd_filename__read_debuglink(const char *filename, char *debuglink,
 	asection *section;
 	bfd *abfd;
 
+	ensure_bfd_init();
 	abfd = bfd_openr(filename, NULL);
 	if (!abfd)
 		return -1;
@@ -480,6 +517,7 @@ int symbol__disassemble_bpf_libbfd(struct symbol *sym __maybe_unused,
 	memset(tpath, 0, sizeof(tpath));
 	perf_exe(tpath, sizeof(tpath));
 
+	ensure_bfd_init();
 	bfdf = bfd_openr(tpath, NULL);
 	if (bfdf == NULL)
 		abort();
diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c
index bca7f0717f35..7aa1f3f55a7d 100644
--- a/tools/perf/util/mutex.c
+++ b/tools/perf/util/mutex.c
@@ -17,7 +17,7 @@ static void check_err(const char *fn, int err)
 
 #define CHECK_ERR(err) check_err(__func__, err)
 
-static void __mutex_init(struct mutex *mtx, bool pshared)
+static void __mutex_init(struct mutex *mtx, bool pshared, bool recursive)
 {
 	pthread_mutexattr_t attr;
 
@@ -27,21 +27,27 @@ static void __mutex_init(struct mutex *mtx, bool pshared)
 	/* In normal builds enable error checking, such as recursive usage. */
 	CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK));
 #endif
+	if (recursive)
+		CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE));
 	if (pshared)
 		CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
-
 	CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr));
 	CHECK_ERR(pthread_mutexattr_destroy(&attr));
 }
 
 void mutex_init(struct mutex *mtx)
 {
-	__mutex_init(mtx, /*pshared=*/false);
+	__mutex_init(mtx, /*pshared=*/false, /*recursive=*/false);
 }
 
 void mutex_init_pshared(struct mutex *mtx)
 {
-	__mutex_init(mtx, /*pshared=*/true);
+	__mutex_init(mtx, /*pshared=*/true, /*recursive=*/false);
+}
+
+void mutex_init_recursive(struct mutex *mtx)
+{
+	__mutex_init(mtx, /*pshared=*/false, /*recursive=*/true);
 }
 
 void mutex_destroy(struct mutex *mtx)
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
index 38458f00846f..70232d8d094f 100644
--- a/tools/perf/util/mutex.h
+++ b/tools/perf/util/mutex.h
@@ -104,6 +104,8 @@ void mutex_init(struct mutex *mtx);
  * process-private attribute.
  */
 void mutex_init_pshared(struct mutex *mtx);
+/* Initializes a mutex that may be recursively held on the same thread. */
+void mutex_init_recursive(struct mutex *mtx);
 void mutex_destroy(struct mutex *mtx);
 
 void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index cc26b7bf302b..948d3e8ad782 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -112,9 +112,13 @@ static bool symbol_type__filter(char __symbol_type)
 	// 'N' first seen in:
 	// ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_
 	// a seemingly Rust mangled name
+	// Ditto for '1':
+	// root@x1:~# grep ' 1 ' /proc/kallsyms
+	// ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_
+	// ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_
 	char symbol_type = toupper(__symbol_type);
 	return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' ||
-	       __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N';
+	       __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1';
 }
 
 static int prefix_underscores_count(const char *str)
diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c
index 44a9ecbd91e8..4d9b0177c312 100644
--- a/tools/power/acpi/tools/pfrut/pfrut.c
+++ b/tools/power/acpi/tools/pfrut/pfrut.c
@@ -222,6 +222,7 @@ int main(int argc, char *argv[])
 	fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR);
 	if (fd_update_log < 0) {
 		printf("PFRT device not supported - Quit...\n");
+		close(fd_update);
 		return 1;
 	}
 
@@ -265,7 +266,8 @@ int main(int argc, char *argv[])
 		printf("chunk2_size:%d\n", data_info.chunk2_size);
 		printf("rollover_cnt:%d\n", data_info.rollover_cnt);
 		printf("reset_cnt:%d\n", data_info.reset_cnt);
-
+		close(fd_update);
+		close(fd_update_log);
 		return 0;
 	}
 
@@ -358,6 +360,7 @@ int main(int argc, char *argv[])
 
 		if (ret == -1) {
 			perror("Failed to load capsule file");
+			munmap(addr_map_capsule, st.st_size);
 			close(fd_capsule);
 			close(fd_update);
 			close(fd_update_log);
@@ -420,7 +423,7 @@ int main(int argc, char *argv[])
 		if (p_mmap == MAP_FAILED) {
 			perror("mmap error.");
 			close(fd_update_log);
-
+			free(log_buf);
 			return 1;
 		}
 
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index c43db1c41205..a1df9196dc45 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -37,9 +37,7 @@ NLS ?=		true
 # cpufreq-bench benchmarking tool
 CPUFREQ_BENCH ?= true
 
-# Do not build libraries, but build the code in statically
-# Libraries are still built, otherwise the Makefile code would
-# be rather ugly.
+# Build the code, including libraries, statically.
 export STATIC ?= false
 
 # Prefix to the directories we're installing to
@@ -207,14 +205,25 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS)
 	$(ECHO) "  CC      " $@
 	$(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c
 
-$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS)
+ifeq ($(strip $(STATIC)),true)
+LIBCPUPOWER := libcpupower.a
+else
+LIBCPUPOWER := libcpupower.so.$(LIB_VER)
+endif
+
+$(OUTPUT)$(LIBCPUPOWER): $(LIB_OBJS)
+ifeq ($(strip $(STATIC)),true)
+	$(ECHO) "  AR      " $@
+	$(QUIET) $(AR) rcs $@ $(LIB_OBJS)
+else
 	$(ECHO) "  LD      " $@
 	$(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \
 		-Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS)
 	@ln -sf $(@F) $(OUTPUT)libcpupower.so
 	@ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+endif
 
-libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER)
+libcpupower: $(OUTPUT)$(LIBCPUPOWER)
 
 # Let all .o files depend on its .c file and all headers
 # Might be worth to put this into utils/Makefile at some point of time
@@ -224,7 +233,7 @@ $(OUTPUT)%.o: %.c
 	$(ECHO) "  CC      " $@
 	$(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c
 
-$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER)
+$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)$(LIBCPUPOWER)
 	$(ECHO) "  CC      " $@
 ifeq ($(strip $(STATIC)),true)
 	$(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@
@@ -269,7 +278,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot
 	done;
 endif
 
-compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER)
+compile-bench: $(OUTPUT)$(LIBCPUPOWER)
 	@V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT)
 
 # we compile into subdirectories. if the target directory is not the
@@ -287,6 +296,7 @@ clean:
 	-find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \
 	 | xargs rm -f
 	-rm -f $(OUTPUT)cpupower
+	-rm -f $(OUTPUT)libcpupower.a
 	-rm -f $(OUTPUT)libcpupower.so*
 	-rm -rf $(OUTPUT)po/*.gmo
 	-rm -rf $(OUTPUT)po/*.pot
@@ -295,7 +305,11 @@ clean:
 
 install-lib: libcpupower
 	$(INSTALL) -d $(DESTDIR)${libdir}
+ifeq ($(strip $(STATIC)),true)
+	$(CP) $(OUTPUT)libcpupower.a $(DESTDIR)${libdir}/
+else
 	$(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/
+endif
 	$(INSTALL) -d $(DESTDIR)${includedir}
 	$(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h
 	$(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h
@@ -336,11 +350,7 @@ install-bench: compile-bench
 	@#DESTDIR must be set from outside to survive
 	@sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install
 
-ifeq ($(strip $(STATIC)),true)
-install: all install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH)
-else
 install: all install-lib install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH)
-endif
 
 uninstall:
 	- rm -f $(DESTDIR)${libdir}/libcpupower.*
diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile
index d68780e2e03d..e4bda2474060 100644
--- a/tools/sched_ext/Makefile
+++ b/tools/sched_ext/Makefile
@@ -133,6 +133,7 @@ $(MAKE_DIRS):
 	$(call msg,MKDIR,,$@)
 	$(Q)mkdir -p $@
 
+ifneq ($(CROSS_COMPILE),)
 $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)			\
 	   $(APIDIR)/linux/bpf.h						\
 	   | $(OBJ_DIR)/libbpf
@@ -141,6 +142,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)			\
 		    EXTRA_CFLAGS='-g -O0 -fPIC'					\
 		    LDFLAGS="$(LDFLAGS)"					\
 		    DESTDIR=$(OUTPUT_DIR) prefix= all install_headers
+endif
 
 $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		\
 	   $(APIDIR)/linux/bpf.h						\
@@ -187,7 +189,7 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP
 
 SCX_COMMON_DEPS := include/scx/common.h include/scx/user_exit_info.h | $(BINDIR)
 
-c-sched-targets = scx_simple scx_qmap scx_central scx_flatcg
+c-sched-targets = scx_simple scx_cpu0 scx_qmap scx_central scx_flatcg
 
 $(addprefix $(BINDIR)/,$(c-sched-targets)): \
 	$(BINDIR)/%: \
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index 06e2551033cb..821d5791bd42 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -60,21 +60,15 @@ static inline void ___vmlinux_h_sanity_check___(void)
 
 s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
 s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
-s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
-			   const struct cpumask *cpus_allowed, u64 flags) __ksym __weak;
-void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
-void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
+s32 __scx_bpf_select_cpu_and(struct task_struct *p, const struct cpumask *cpus_allowed,
+			     struct scx_bpf_select_cpu_and_args *args) __ksym __weak;
+bool __scx_bpf_dsq_insert_vtime(struct task_struct *p, struct scx_bpf_dsq_insert_vtime_args *args) __ksym __weak;
 u32 scx_bpf_dispatch_nr_slots(void) __ksym;
 void scx_bpf_dispatch_cancel(void) __ksym;
-bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak;
-void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
-void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
-bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-u32 scx_bpf_reenqueue_local(void) __ksym;
 void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym;
 s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym;
 void scx_bpf_destroy_dsq(u64 dsq_id) __ksym;
+struct task_struct *scx_bpf_dsq_peek(u64 dsq_id) __ksym __weak;
 int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id, u64 flags) __ksym __weak;
 struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) __ksym __weak;
 void bpf_iter_scx_dsq_destroy(struct bpf_iter_scx_dsq *it) __ksym __weak;
@@ -105,7 +99,6 @@ s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
 struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
 struct rq *scx_bpf_locked_rq(void) __ksym;
 struct task_struct *scx_bpf_cpu_curr(s32 cpu) __ksym __weak;
-struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
 u64 scx_bpf_now(void) __ksym __weak;
 void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak;
 
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index dd9144624dc9..f2969c3061a7 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -16,119 +16,92 @@
 })
 
 /* v6.12: 819513666966 ("sched_ext: Add cgroup support") */
-#define __COMPAT_scx_bpf_task_cgroup(p)						\
-	(bpf_ksym_exists(scx_bpf_task_cgroup) ?					\
-	 scx_bpf_task_cgroup((p)) : NULL)
+struct cgroup *scx_bpf_task_cgroup___new(struct task_struct *p) __ksym __weak;
+
+#define scx_bpf_task_cgroup(p)							\
+	(bpf_ksym_exists(scx_bpf_task_cgroup___new) ?				\
+	 scx_bpf_task_cgroup___new((p)) : NULL)
 
 /*
  * v6.13: The verb `dispatch` was too overloaded and confusing. kfuncs are
  * renamed to unload the verb.
  *
- * Build error is triggered if old names are used. New binaries work with both
- * new and old names. The compat macros will be removed on v6.15 release.
- *
  * scx_bpf_dispatch_from_dsq() and friends were added during v6.12 by
  * 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()").
- * Preserve __COMPAT macros until v6.15.
  */
-void scx_bpf_dispatch___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
-void scx_bpf_dispatch_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
-bool scx_bpf_consume___compat(u64 dsq_id) __ksym __weak;
-void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
-void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
-bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
-int bpf_cpumask_populate(struct cpumask *dst, void *src, size_t src__sz) __ksym __weak;
-
-#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags)				\
-	(bpf_ksym_exists(scx_bpf_dsq_insert) ?					\
-	 scx_bpf_dsq_insert((p), (dsq_id), (slice), (enq_flags)) :		\
-	 scx_bpf_dispatch___compat((p), (dsq_id), (slice), (enq_flags)))
-
-#define scx_bpf_dsq_insert_vtime(p, dsq_id, slice, vtime, enq_flags)		\
-	(bpf_ksym_exists(scx_bpf_dsq_insert_vtime) ?				\
-	 scx_bpf_dsq_insert_vtime((p), (dsq_id), (slice), (vtime), (enq_flags)) : \
-	 scx_bpf_dispatch_vtime___compat((p), (dsq_id), (slice), (vtime), (enq_flags)))
+bool scx_bpf_dsq_move_to_local___new(u64 dsq_id) __ksym __weak;
+void scx_bpf_dsq_move_set_slice___new(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
+void scx_bpf_dsq_move_set_vtime___new(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
+bool scx_bpf_dsq_move___new(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+bool scx_bpf_dsq_move_vtime___new(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+
+bool scx_bpf_consume___old(u64 dsq_id) __ksym __weak;
+void scx_bpf_dispatch_from_dsq_set_slice___old(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
+void scx_bpf_dispatch_from_dsq_set_vtime___old(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
+bool scx_bpf_dispatch_from_dsq___old(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+bool scx_bpf_dispatch_vtime_from_dsq___old(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
 
 #define scx_bpf_dsq_move_to_local(dsq_id)					\
-	(bpf_ksym_exists(scx_bpf_dsq_move_to_local) ?				\
-	 scx_bpf_dsq_move_to_local((dsq_id)) :					\
-	 scx_bpf_consume___compat((dsq_id)))
-
-#define __COMPAT_scx_bpf_dsq_move_set_slice(it__iter, slice)			\
-	(bpf_ksym_exists(scx_bpf_dsq_move_set_slice) ?				\
-	 scx_bpf_dsq_move_set_slice((it__iter), (slice)) :			\
-	 (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice___compat) ?	\
-	  scx_bpf_dispatch_from_dsq_set_slice___compat((it__iter), (slice)) :	\
+	(bpf_ksym_exists(scx_bpf_dsq_move_to_local___new) ?			\
+	 scx_bpf_dsq_move_to_local___new((dsq_id)) :				\
+	 scx_bpf_consume___old((dsq_id)))
+
+#define scx_bpf_dsq_move_set_slice(it__iter, slice)				\
+	(bpf_ksym_exists(scx_bpf_dsq_move_set_slice___new) ?			\
+	 scx_bpf_dsq_move_set_slice___new((it__iter), (slice)) :		\
+	 (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice___old) ?		\
+	  scx_bpf_dispatch_from_dsq_set_slice___old((it__iter), (slice)) :	\
+	  (void)0))
+
+#define scx_bpf_dsq_move_set_vtime(it__iter, vtime)				\
+	(bpf_ksym_exists(scx_bpf_dsq_move_set_vtime___new) ?			\
+	 scx_bpf_dsq_move_set_vtime___new((it__iter), (vtime)) :		\
+	 (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime___old) ?		\
+	  scx_bpf_dispatch_from_dsq_set_vtime___old((it__iter), (vtime)) :	\
 	  (void)0))
 
-#define __COMPAT_scx_bpf_dsq_move_set_vtime(it__iter, vtime)			\
-	(bpf_ksym_exists(scx_bpf_dsq_move_set_vtime) ?				\
-	 scx_bpf_dsq_move_set_vtime((it__iter), (vtime)) :			\
-	 (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime___compat) ?	\
-	  scx_bpf_dispatch_from_dsq_set_vtime___compat((it__iter), (vtime)) :	\
-	  (void) 0))
-
-#define __COMPAT_scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags)		\
-	(bpf_ksym_exists(scx_bpf_dsq_move) ?					\
-	 scx_bpf_dsq_move((it__iter), (p), (dsq_id), (enq_flags)) :		\
-	 (bpf_ksym_exists(scx_bpf_dispatch_from_dsq___compat) ?			\
-	  scx_bpf_dispatch_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
+#define scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags)			\
+	(bpf_ksym_exists(scx_bpf_dsq_move___new) ?				\
+	 scx_bpf_dsq_move___new((it__iter), (p), (dsq_id), (enq_flags)) :	\
+	 (bpf_ksym_exists(scx_bpf_dispatch_from_dsq___old) ?			\
+	  scx_bpf_dispatch_from_dsq___old((it__iter), (p), (dsq_id), (enq_flags)) : \
 	  false))
 
-#define __COMPAT_scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags)		\
-	(bpf_ksym_exists(scx_bpf_dsq_move_vtime) ?				\
-	 scx_bpf_dsq_move_vtime((it__iter), (p), (dsq_id), (enq_flags)) :	\
-	 (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq___compat) ?		\
-	  scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
+#define scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags)			\
+	(bpf_ksym_exists(scx_bpf_dsq_move_vtime___new) ?			\
+	 scx_bpf_dsq_move_vtime___new((it__iter), (p), (dsq_id), (enq_flags)) : \
+	 (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq___old) ?		\
+	  scx_bpf_dispatch_vtime_from_dsq___old((it__iter), (p), (dsq_id), (enq_flags)) : \
 	  false))
 
+/*
+ * v6.15: 950ad93df2fc ("bpf: add kfunc for populating cpumask bits")
+ *
+ * Compat macro will be dropped on v6.19 release.
+ */
+int bpf_cpumask_populate(struct cpumask *dst, void *src, size_t src__sz) __ksym __weak;
+
 #define __COMPAT_bpf_cpumask_populate(cpumask, src, size__sz)		\
 	(bpf_ksym_exists(bpf_cpumask_populate) ?			\
 	 (bpf_cpumask_populate(cpumask, src, size__sz)) : -EOPNOTSUPP)
 
-#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags)				\
-	_Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()")
-
-#define scx_bpf_dispatch_vtime(p, dsq_id, slice, vtime, enq_flags)		\
-	_Static_assert(false, "scx_bpf_dispatch_vtime() renamed to scx_bpf_dsq_insert_vtime()")
-
-#define scx_bpf_consume(dsq_id) ({						\
-	_Static_assert(false, "scx_bpf_consume() renamed to scx_bpf_dsq_move_to_local()"); \
-	false;									\
-})
-
-#define scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice)		\
-	_Static_assert(false, "scx_bpf_dispatch_from_dsq_set_slice() renamed to scx_bpf_dsq_move_set_slice()")
-
-#define scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime)		\
-	_Static_assert(false, "scx_bpf_dispatch_from_dsq_set_vtime() renamed to scx_bpf_dsq_move_set_vtime()")
-
-#define scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({	\
-	_Static_assert(false, "scx_bpf_dispatch_from_dsq() renamed to scx_bpf_dsq_move()"); \
-	false;									\
-})
-
-#define scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({  \
-	_Static_assert(false, "scx_bpf_dispatch_vtime_from_dsq() renamed to scx_bpf_dsq_move_vtime()"); \
-	false;									\
-})
-
-#define __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice)		\
-	_Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_slice() renamed to __COMPAT_scx_bpf_dsq_move_set_slice()")
-
-#define __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime)		\
-	_Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_vtime() renamed to __COMPAT_scx_bpf_dsq_move_set_vtime()")
-
-#define __COMPAT_scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({	\
-	_Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move()"); \
-	false;									\
-})
-
-#define __COMPAT_scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({  \
-	_Static_assert(false, "__COMPAT_scx_bpf_dispatch_vtime_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move_vtime()"); \
-	false;									\
-})
+/*
+ * v6.19: Introduce lockless peek API for user DSQs.
+ *
+ * Preserve the following macro until v6.21.
+ */
+static inline struct task_struct *__COMPAT_scx_bpf_dsq_peek(u64 dsq_id)
+{
+	struct task_struct *p = NULL;
+	struct bpf_iter_scx_dsq it;
+
+	if (bpf_ksym_exists(scx_bpf_dsq_peek))
+		return scx_bpf_dsq_peek(dsq_id);
+	if (!bpf_iter_scx_dsq_new(&it, dsq_id, 0))
+		p = bpf_iter_scx_dsq_next(&it);
+	bpf_iter_scx_dsq_destroy(&it);
+	return p;
+}
 
 /**
  * __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on
@@ -248,6 +221,161 @@ static inline struct task_struct *__COMPAT_scx_bpf_cpu_curr(int cpu)
 }
 
 /*
+ * v6.19: To work around BPF maximum parameter limit, the following kfuncs are
+ * replaced with variants that pack scalar arguments in a struct. Wrappers are
+ * provided to maintain source compatibility.
+ *
+ * v6.13: scx_bpf_dsq_insert_vtime() renaming is also handled here. See the
+ * block on dispatch renaming above for more details.
+ *
+ * The kernel will carry the compat variants until v6.23 to maintain binary
+ * compatibility. After v6.23 release, remove the compat handling and move the
+ * wrappers to common.bpf.h.
+ */
+s32 scx_bpf_select_cpu_and___compat(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+				    const struct cpumask *cpus_allowed, u64 flags) __ksym __weak;
+void scx_bpf_dispatch_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
+void scx_bpf_dsq_insert_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
+
+/**
+ * scx_bpf_select_cpu_and - Pick an idle CPU usable by task @p
+ * @p: task_struct to select a CPU for
+ * @prev_cpu: CPU @p was on previously
+ * @wake_flags: %SCX_WAKE_* flags
+ * @cpus_allowed: cpumask of allowed CPUs
+ * @flags: %SCX_PICK_IDLE* flags
+ *
+ * Inline wrapper that packs scalar arguments into a struct and calls
+ * __scx_bpf_select_cpu_and(). See __scx_bpf_select_cpu_and() for details.
+ */
+static inline s32
+scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+		       const struct cpumask *cpus_allowed, u64 flags)
+{
+	if (bpf_core_type_exists(struct scx_bpf_select_cpu_and_args)) {
+		struct scx_bpf_select_cpu_and_args args = {
+			.prev_cpu = prev_cpu,
+			.wake_flags = wake_flags,
+			.flags = flags,
+		};
+
+		return __scx_bpf_select_cpu_and(p, cpus_allowed, &args);
+	} else {
+		return scx_bpf_select_cpu_and___compat(p, prev_cpu, wake_flags,
+						       cpus_allowed, flags);
+	}
+}
+
+/**
+ * scx_bpf_dsq_insert_vtime - Insert a task into the vtime priority queue of a DSQ
+ * @p: task_struct to insert
+ * @dsq_id: DSQ to insert into
+ * @slice: duration @p can run for in nsecs, 0 to keep the current value
+ * @vtime: @p's ordering inside the vtime-sorted queue of the target DSQ
+ * @enq_flags: SCX_ENQ_*
+ *
+ * Inline wrapper that packs scalar arguments into a struct and calls
+ * __scx_bpf_dsq_insert_vtime(). See __scx_bpf_dsq_insert_vtime() for details.
+ */
+static inline bool
+scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime,
+			 u64 enq_flags)
+{
+	if (bpf_core_type_exists(struct scx_bpf_dsq_insert_vtime_args)) {
+		struct scx_bpf_dsq_insert_vtime_args args = {
+			.dsq_id = dsq_id,
+			.slice = slice,
+			.vtime = vtime,
+			.enq_flags = enq_flags,
+		};
+
+		return __scx_bpf_dsq_insert_vtime(p, &args);
+	} else if (bpf_ksym_exists(scx_bpf_dsq_insert_vtime___compat)) {
+		scx_bpf_dsq_insert_vtime___compat(p, dsq_id, slice, vtime,
+						  enq_flags);
+		return true;
+	} else {
+		scx_bpf_dispatch_vtime___compat(p, dsq_id, slice, vtime,
+						enq_flags);
+		return true;
+	}
+}
+
+/*
+ * v6.19: scx_bpf_dsq_insert() now returns bool instead of void. Move
+ * scx_bpf_dsq_insert() decl to common.bpf.h and drop compat helper after v6.22.
+ * The extra ___compat suffix is to work around libbpf not ignoring __SUFFIX on
+ * kernel side. The entire suffix can be dropped later.
+ *
+ * v6.13: scx_bpf_dsq_insert() renaming is also handled here. See the block on
+ * dispatch renaming above for more details.
+ */
+bool scx_bpf_dsq_insert___v2___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
+void scx_bpf_dsq_insert___v1(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
+void scx_bpf_dispatch___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
+
+static inline bool
+scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags)
+{
+	if (bpf_ksym_exists(scx_bpf_dsq_insert___v2___compat)) {
+		return scx_bpf_dsq_insert___v2___compat(p, dsq_id, slice, enq_flags);
+	} else if (bpf_ksym_exists(scx_bpf_dsq_insert___v1)) {
+		scx_bpf_dsq_insert___v1(p, dsq_id, slice, enq_flags);
+		return true;
+	} else {
+		scx_bpf_dispatch___compat(p, dsq_id, slice, enq_flags);
+		return true;
+	}
+}
+
+/*
+ * v6.19: scx_bpf_task_set_slice() and scx_bpf_task_set_dsq_vtime() added to for
+ * sub-sched authority checks. Drop the wrappers and move the decls to
+ * common.bpf.h after v6.22.
+ */
+bool scx_bpf_task_set_slice___new(struct task_struct *p, u64 slice) __ksym __weak;
+bool scx_bpf_task_set_dsq_vtime___new(struct task_struct *p, u64 vtime) __ksym __weak;
+
+static inline void scx_bpf_task_set_slice(struct task_struct *p, u64 slice)
+{
+	if (bpf_ksym_exists(scx_bpf_task_set_slice___new))
+		scx_bpf_task_set_slice___new(p, slice);
+	else
+		p->scx.slice = slice;
+}
+
+static inline void scx_bpf_task_set_dsq_vtime(struct task_struct *p, u64 vtime)
+{
+	if (bpf_ksym_exists(scx_bpf_task_set_dsq_vtime___new))
+		scx_bpf_task_set_dsq_vtime___new(p, vtime);
+	else
+		p->scx.dsq_vtime = vtime;
+}
+
+/*
+ * v6.19: The new void variant can be called from anywhere while the older v1
+ * variant can only be called from ops.cpu_release(). The double ___ prefixes on
+ * the v2 variant need to be removed once libbpf is updated to ignore ___ prefix
+ * on kernel side. Drop the wrapper and move the decl to common.bpf.h after
+ * v6.22.
+ */
+u32 scx_bpf_reenqueue_local___v1(void) __ksym __weak;
+void scx_bpf_reenqueue_local___v2___compat(void) __ksym __weak;
+
+static inline bool __COMPAT_scx_bpf_reenqueue_local_from_anywhere(void)
+{
+	return bpf_ksym_exists(scx_bpf_reenqueue_local___v2___compat);
+}
+
+static inline void scx_bpf_reenqueue_local(void)
+{
+	if (__COMPAT_scx_bpf_reenqueue_local_from_anywhere())
+		scx_bpf_reenqueue_local___v2___compat();
+	else
+		scx_bpf_reenqueue_local___v1();
+}
+
+/*
  * Define sched_ext_ops. This may be expanded to define multiple variants for
  * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
  */
diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h
index 35c67c5174ac..8b4897fc8b99 100644
--- a/tools/sched_ext/include/scx/compat.h
+++ b/tools/sched_ext/include/scx/compat.h
@@ -151,6 +151,10 @@ static inline long scx_hotplug_seq(void)
  *
  * ec7e3b0463e1 ("implement-ops") in https://github.com/sched-ext/sched_ext is
  * the current minimum required kernel version.
+ *
+ * COMPAT:
+ * - v6.17: ops.cgroup_set_bandwidth()
+ * - v6.19: ops.cgroup_set_idle()
  */
 #define SCX_OPS_OPEN(__ops_name, __scx_name) ({					\
 	struct __scx_name *__skel;						\
@@ -162,6 +166,16 @@ static inline long scx_hotplug_seq(void)
 	SCX_BUG_ON(!__skel, "Could not open " #__scx_name);			\
 	__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq();		\
 	SCX_ENUM_INIT(__skel);							\
+	if (__skel->struct_ops.__ops_name->cgroup_set_bandwidth &&		\
+	    !__COMPAT_struct_has_field("sched_ext_ops", "cgroup_set_bandwidth")) { \
+		fprintf(stderr, "WARNING: kernel doesn't support ops.cgroup_set_bandwidth()\n"); \
+		__skel->struct_ops.__ops_name->cgroup_set_bandwidth = NULL;	\
+	}									\
+	if (__skel->struct_ops.__ops_name->cgroup_set_idle &&			\
+	    !__COMPAT_struct_has_field("sched_ext_ops", "cgroup_set_idle")) { \
+		fprintf(stderr, "WARNING: kernel doesn't support ops.cgroup_set_idle()\n"); \
+		__skel->struct_ops.__ops_name->cgroup_set_idle = NULL;	\
+	}									\
 	__skel; 								\
 })
 
diff --git a/tools/sched_ext/scx_cpu0.bpf.c b/tools/sched_ext/scx_cpu0.bpf.c
new file mode 100644
index 000000000000..6326ce598c8e
--- /dev/null
+++ b/tools/sched_ext/scx_cpu0.bpf.c
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A CPU0 scheduler.
+ *
+ * This scheduler queues all tasks to a shared DSQ and only dispatches them on
+ * CPU0 in FIFO order. This is useful for testing bypass behavior when many
+ * tasks are concentrated on a single CPU. If the load balancer doesn't work,
+ * bypass mode can trigger task hangs or RCU stalls as the queue is long and
+ * there's only one CPU working on it.
+ *
+ * - Statistics tracking how many tasks are queued to local and CPU0 DSQs.
+ * - Termination notification for userspace.
+ *
+ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2025 Tejun Heo <tj@kernel.org>
+ */
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+const volatile u32 nr_cpus = 32;	/* !0 for veristat, set during init */
+
+UEI_DEFINE(uei);
+
+/*
+ * We create a custom DSQ with ID 0 that we dispatch to and consume from on
+ * CPU0.
+ */
+#define DSQ_CPU0 0
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(u64));
+	__uint(max_entries, 2);			/* [local, cpu0] */
+} stats SEC(".maps");
+
+static void stat_inc(u32 idx)
+{
+	u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);
+	if (cnt_p)
+		(*cnt_p)++;
+}
+
+s32 BPF_STRUCT_OPS(cpu0_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+{
+	return 0;
+}
+
+void BPF_STRUCT_OPS(cpu0_enqueue, struct task_struct *p, u64 enq_flags)
+{
+	/*
+	 * select_cpu() always picks CPU0. If @p is not on CPU0, it can't run on
+	 * CPU 0. Queue on whichever CPU it's currently only.
+	 */
+	if (scx_bpf_task_cpu(p) != 0) {
+		stat_inc(0);	/* count local queueing */
+		scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
+		return;
+	}
+
+	stat_inc(1);	/* count cpu0 queueing */
+	scx_bpf_dsq_insert(p, DSQ_CPU0, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(cpu0_dispatch, s32 cpu, struct task_struct *prev)
+{
+	if (cpu == 0)
+		scx_bpf_dsq_move_to_local(DSQ_CPU0);
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(cpu0_init)
+{
+	return scx_bpf_create_dsq(DSQ_CPU0, -1);
+}
+
+void BPF_STRUCT_OPS(cpu0_exit, struct scx_exit_info *ei)
+{
+	UEI_RECORD(uei, ei);
+}
+
+SCX_OPS_DEFINE(cpu0_ops,
+	       .select_cpu		= (void *)cpu0_select_cpu,
+	       .enqueue			= (void *)cpu0_enqueue,
+	       .dispatch		= (void *)cpu0_dispatch,
+	       .init			= (void *)cpu0_init,
+	       .exit			= (void *)cpu0_exit,
+	       .name			= "cpu0");
diff --git a/tools/sched_ext/scx_cpu0.c b/tools/sched_ext/scx_cpu0.c
new file mode 100644
index 000000000000..1e4fa4ab8da9
--- /dev/null
+++ b/tools/sched_ext/scx_cpu0.c
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2025 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <assert.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include "scx_cpu0.bpf.skel.h"
+
+const char help_fmt[] =
+"A cpu0 sched_ext scheduler.\n"
+"\n"
+"See the top-level comment in .bpf.c for more details.\n"
+"\n"
+"Usage: %s [-v]\n"
+"\n"
+"  -v            Print libbpf debug messages\n"
+"  -h            Display this help and exit\n";
+
+static bool verbose;
+static volatile int exit_req;
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+	if (level == LIBBPF_DEBUG && !verbose)
+		return 0;
+	return vfprintf(stderr, format, args);
+}
+
+static void sigint_handler(int sig)
+{
+	exit_req = 1;
+}
+
+static void read_stats(struct scx_cpu0 *skel, __u64 *stats)
+{
+	int nr_cpus = libbpf_num_possible_cpus();
+	assert(nr_cpus > 0);
+	__u64 cnts[2][nr_cpus];
+	__u32 idx;
+
+	memset(stats, 0, sizeof(stats[0]) * 2);
+
+	for (idx = 0; idx < 2; idx++) {
+		int ret, cpu;
+
+		ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
+					  &idx, cnts[idx]);
+		if (ret < 0)
+			continue;
+		for (cpu = 0; cpu < nr_cpus; cpu++)
+			stats[idx] += cnts[idx][cpu];
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct scx_cpu0 *skel;
+	struct bpf_link *link;
+	__u32 opt;
+	__u64 ecode;
+
+	libbpf_set_print(libbpf_print_fn);
+	signal(SIGINT, sigint_handler);
+	signal(SIGTERM, sigint_handler);
+restart:
+	skel = SCX_OPS_OPEN(cpu0_ops, scx_cpu0);
+
+	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+	while ((opt = getopt(argc, argv, "vh")) != -1) {
+		switch (opt) {
+		case 'v':
+			verbose = true;
+			break;
+		default:
+			fprintf(stderr, help_fmt, basename(argv[0]));
+			return opt != 'h';
+		}
+	}
+
+	SCX_OPS_LOAD(skel, cpu0_ops, scx_cpu0, uei);
+	link = SCX_OPS_ATTACH(skel, cpu0_ops, scx_cpu0);
+
+	while (!exit_req && !UEI_EXITED(skel, uei)) {
+		__u64 stats[2];
+
+		read_stats(skel, stats);
+		printf("local=%llu cpu0=%llu\n", stats[0], stats[1]);
+		fflush(stdout);
+		sleep(1);
+	}
+
+	bpf_link__destroy(link);
+	ecode = UEI_REPORT(skel, uei);
+	scx_cpu0__destroy(skel);
+
+	if (UEI_ECODE_RESTART(ecode))
+		goto restart;
+	return 0;
+}
diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
index 2c720e3ecad5..43126858b8e4 100644
--- a/tools/sched_ext/scx_flatcg.bpf.c
+++ b/tools/sched_ext/scx_flatcg.bpf.c
@@ -382,7 +382,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags)
 		return;
 	}
 
-	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+	cgrp = scx_bpf_task_cgroup(p);
 	cgc = find_cgrp_ctx(cgrp);
 	if (!cgc)
 		goto out_release;
@@ -508,7 +508,7 @@ void BPF_STRUCT_OPS(fcg_runnable, struct task_struct *p, u64 enq_flags)
 {
 	struct cgroup *cgrp;
 
-	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+	cgrp = scx_bpf_task_cgroup(p);
 	update_active_weight_sums(cgrp, true);
 	bpf_cgroup_release(cgrp);
 }
@@ -521,7 +521,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p)
 	if (fifo_sched)
 		return;
 
-	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+	cgrp = scx_bpf_task_cgroup(p);
 	cgc = find_cgrp_ctx(cgrp);
 	if (cgc) {
 		/*
@@ -564,7 +564,7 @@ void BPF_STRUCT_OPS(fcg_stopping, struct task_struct *p, bool runnable)
 	if (!taskc->bypassed_at)
 		return;
 
-	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+	cgrp = scx_bpf_task_cgroup(p);
 	cgc = find_cgrp_ctx(cgrp);
 	if (cgc) {
 		__sync_fetch_and_add(&cgc->cvtime_delta,
@@ -578,7 +578,7 @@ void BPF_STRUCT_OPS(fcg_quiescent, struct task_struct *p, u64 deq_flags)
 {
 	struct cgroup *cgrp;
 
-	cgrp = __COMPAT_scx_bpf_task_cgroup(p);
+	cgrp = scx_bpf_task_cgroup(p);
 	update_active_weight_sums(cgrp, false);
 	bpf_cgroup_release(cgrp);
 }
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 3072b593f898..df21fad0c438 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -202,6 +202,9 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
 	void *ring;
 	s32 cpu;
 
+	if (enq_flags & SCX_ENQ_REENQ)
+		__sync_fetch_and_add(&nr_reenqueued, 1);
+
 	if (p->flags & PF_KTHREAD) {
 		if (stall_kernel_nth && !(++kernel_cnt % stall_kernel_nth))
 			return;
@@ -320,12 +323,9 @@ static bool dispatch_highpri(bool from_timer)
 
 		if (tctx->highpri) {
 			/* exercise the set_*() and vtime interface too */
-			__COMPAT_scx_bpf_dsq_move_set_slice(
-				BPF_FOR_EACH_ITER, slice_ns * 2);
-			__COMPAT_scx_bpf_dsq_move_set_vtime(
-				BPF_FOR_EACH_ITER, highpri_seq++);
-			__COMPAT_scx_bpf_dsq_move_vtime(
-				BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0);
+			scx_bpf_dsq_move_set_slice(BPF_FOR_EACH_ITER, slice_ns * 2);
+			scx_bpf_dsq_move_set_vtime(BPF_FOR_EACH_ITER, highpri_seq++);
+			scx_bpf_dsq_move_vtime(BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0);
 		}
 	}
 
@@ -342,9 +342,8 @@ static bool dispatch_highpri(bool from_timer)
 		else
 			cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0);
 
-		if (__COMPAT_scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p,
-					      SCX_DSQ_LOCAL_ON | cpu,
-					      SCX_ENQ_PREEMPT)) {
+		if (scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p, SCX_DSQ_LOCAL_ON | cpu,
+				     SCX_ENQ_PREEMPT)) {
 			if (cpu == this_cpu) {
 				dispatched = true;
 				__sync_fetch_and_add(&nr_expedited_local, 1);
@@ -533,20 +532,35 @@ bool BPF_STRUCT_OPS(qmap_core_sched_before,
 	return task_qdist(a) > task_qdist(b);
 }
 
-void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
+SEC("tp_btf/sched_switch")
+int BPF_PROG(qmap_sched_switch, bool preempt, struct task_struct *prev,
+	     struct task_struct *next, unsigned long prev_state)
 {
-	u32 cnt;
+	if (!__COMPAT_scx_bpf_reenqueue_local_from_anywhere())
+		return 0;
 
 	/*
-	 * Called when @cpu is taken by a higher priority scheduling class. This
-	 * makes @cpu no longer available for executing sched_ext tasks. As we
-	 * don't want the tasks in @cpu's local dsq to sit there until @cpu
-	 * becomes available again, re-enqueue them into the global dsq. See
-	 * %SCX_ENQ_REENQ handling in qmap_enqueue().
+	 * If @cpu is taken by a higher priority scheduling class, it is no
+	 * longer available for executing sched_ext tasks. As we don't want the
+	 * tasks in @cpu's local dsq to sit there until @cpu becomes available
+	 * again, re-enqueue them into the global dsq. See %SCX_ENQ_REENQ
+	 * handling in qmap_enqueue().
 	 */
-	cnt = scx_bpf_reenqueue_local();
-	if (cnt)
-		__sync_fetch_and_add(&nr_reenqueued, cnt);
+	switch (next->policy) {
+	case 1: /* SCHED_FIFO */
+	case 2: /* SCHED_RR */
+	case 6: /* SCHED_DEADLINE */
+		scx_bpf_reenqueue_local();
+	}
+
+	return 0;
+}
+
+void BPF_STRUCT_OPS(qmap_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
+{
+	/* see qmap_sched_switch() to learn how to do this on newer kernels */
+	if (!__COMPAT_scx_bpf_reenqueue_local_from_anywhere())
+		scx_bpf_reenqueue_local();
 }
 
 s32 BPF_STRUCT_OPS(qmap_init_task, struct task_struct *p,
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index a85c19e9524e..0114108ab25f 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -1071,7 +1071,7 @@ static bool sve_write_supported(struct test_config *config)
 
 static bool sve_write_fpsimd_supported(struct test_config *config)
 {
-	if (!sve_supported())
+	if (!sve_supported() && !sme_supported())
 		return false;
 
 	if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
@@ -1231,9 +1231,6 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config)
 	vl = vl_expected(config);
 	vq = __sve_vq_from_vl(vl);
 
-	if (!vl)
-		return;
-
 	iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD);
 	iov.iov_base = malloc(iov.iov_len);
 	if (!iov.iov_base) {
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index e0fc3a001e28..f44d44618575 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -394,6 +394,58 @@ out:
 	free(svebuf);
 }
 
+/* Write the FPSIMD registers via the SVE regset when SVE is not supported */
+static void ptrace_sve_fpsimd_no_sve(pid_t child)
+{
+	void *svebuf;
+	struct user_sve_header *sve;
+	struct user_fpsimd_state *fpsimd, new_fpsimd;
+	unsigned int i, j;
+	unsigned char *p;
+	int ret;
+
+	svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+	if (!svebuf) {
+		ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
+		return;
+	}
+
+	/* On a system without SVE the VL should be set to 0 */
+	memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+	sve = svebuf;
+	sve->flags = SVE_PT_REGS_FPSIMD;
+	sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+	sve->vl = 0;
+
+	/* Try to set a known FPSIMD state via PT_REGS_SVE */
+	fpsimd = (struct user_fpsimd_state *)((char *)sve +
+					      SVE_PT_FPSIMD_OFFSET);
+	for (i = 0; i < 32; ++i) {
+		p = (unsigned char *)&fpsimd->vregs[i];
+
+		for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+			p[j] = j;
+	}
+
+	ret = set_sve(child, &vec_types[0], sve);
+	ksft_test_result(ret == 0, "FPSIMD write via SVE\n");
+	if (ret) {
+		ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+		goto out;
+	}
+
+	/* Verify via the FPSIMD regset */
+	if (get_fpsimd(child, &new_fpsimd)) {
+		ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+		goto out;
+	}
+	ksft_test_result(memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0,
+			 "Verify FPSIMD write via SVE\n");
+
+out:
+	free(svebuf);
+}
+
 /* Validate attempting to set SVE data and read SVE data */
 static void ptrace_set_sve_get_sve_data(pid_t child,
 					const struct vec_type *type,
@@ -826,6 +878,15 @@ static int do_parent(pid_t child)
 		}
 	}
 
+	/* We support SVE writes of FPSMID format on SME only systems */
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE) &&
+	    (getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+		ptrace_sve_fpsimd_no_sve(child);
+	} else {
+		ksft_test_result_skip("FPSIMD write via SVE\n");
+		ksft_test_result_skip("Verify FPSIMD write via SVE\n");
+	}
+
 	ret = EXIT_SUCCESS;
 
 error:
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
index 38080f3c3280..a8df05771670 100644
--- a/tools/testing/selftests/arm64/fp/zt-test.S
+++ b/tools/testing/selftests/arm64/fp/zt-test.S
@@ -276,7 +276,7 @@ function barf
 	bl	putdec
 	puts	", iteration="
 	mov	x0, x22
-	bl	putdec
+	bl	putdecn
 	puts	"\tExpected ["
 	mov	x0, x10
 	mov	x1, x12
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index be1ee7ba7ce0..19c1638e312a 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,6 +23,7 @@ test_tcpnotify_user
 test_libbpf
 xdping
 test_cpp
+test_progs_verification_cert
 *.d
 *.subskel.h
 *.skel.h
@@ -32,7 +33,6 @@ test_cpp
 /cpuv4
 /host-tools
 /tools
-/runqslower
 /bench
 /veristat
 /sign-file
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f00587d4ede6..b7030a6e2e76 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -46,6 +46,7 @@ endif
 
 CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11				\
 	  -Wall -Werror -fno-omit-frame-pointer				\
+	  -Wno-unused-but-set-variable					\
 	  $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS)			\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT)
@@ -98,14 +99,11 @@ TEST_GEN_PROGS += test_progs-cpuv4
 TEST_INST_SUBDIRS += cpuv4
 endif
 
-TEST_GEN_FILES = test_tc_edt.bpf.o
 TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
 	test_lirc_mode2.sh \
-	test_tc_tunnel.sh \
-	test_tc_edt.sh \
 	test_xdping.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
@@ -127,7 +125,6 @@ TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
 TEST_GEN_PROGS_EXTENDED = \
 	bench \
 	flow_dissector_load \
-	runqslower \
 	test_cpp \
 	test_lirc_mode2_user \
 	veristat \
@@ -209,8 +206,6 @@ HOST_INCLUDE_DIR	:= $(INCLUDE_DIR)
 endif
 HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
 RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
-RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/
-
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
 		     ../../../../vmlinux				\
@@ -232,7 +227,7 @@ $(notdir $(TEST_GEN_PROGS) $(TEST_KMODS)				\
 MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf	\
 	       $(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool		\
 	       $(HOST_BUILD_DIR)/resolve_btfids				\
-	       $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
+	       $(INCLUDE_DIR))
 $(MAKE_DIRS):
 	$(call msg,MKDIR,,$@)
 	$(Q)mkdir -p $@
@@ -304,17 +299,6 @@ TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
 USE_BOOTSTRAP := "bootstrap/"
 endif
 
-$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	       \
-		    OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF)     \
-		    BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/		       \
-		    BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/			       \
-		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)		       \
-		    BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN)		       \
-		    EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
-		    EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' &&	       \
-		    cp $(RUNQSLOWER_OUTPUT)runqslower $@
-
 TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
@@ -453,7 +437,9 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)	\
 	     -I$(abspath $(OUTPUT)/../usr/include)			\
 	     -std=gnu11		 					\
 	     -fno-strict-aliasing 					\
-	     -Wno-compare-distinct-pointer-types
+	     -Wno-compare-distinct-pointer-types			\
+	     -Wno-initializer-overrides					\
+	     #
 # TODO: enable me -Wsign-compare
 
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
@@ -498,7 +484,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 
 LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c 	\
 	core_kern.c core_kern_overflow.c test_ringbuf.c			\
-	test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+	test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c    \
+	test_ringbuf_overwrite.c
 
 LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
 
@@ -543,6 +530,8 @@ TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o,	\
 				 $$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
 TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o,		\
 				 $$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
+TRUNNER_LIB_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o,		\
+				 $$(filter %.c,$(TRUNNER_LIB_SOURCES)))
 TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
 TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
 TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
@@ -686,6 +675,10 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 	$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
 
+$(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c
+	$$(call msg,LIB-OBJ,$(TRUNNER_BINARY),$$@)
+	$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+
 # non-flavored in-srctree builds receive special treatment, in particular, we
 # do not need to copy extra resources (see e.g. test_btf_dump_case())
 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
@@ -699,6 +692,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
 
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ)		\
+			     $(TRUNNER_LIB_OBJS)			\
 			     $(RESOLVE_BTFIDS)				\
 			     $(TRUNNER_BPFTOOL)				\
 			     $(OUTPUT)/veristat				\
@@ -721,7 +715,8 @@ $(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
 	$(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
 
 $(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
-	$(Q)xxd -i -n test_progs_verification_cert $< > $@
+	$(Q)ln -fs $< test_progs_verification_cert && \
+	xxd -i test_progs_verification_cert > $@
 
 # Define test_progs test runner.
 TRUNNER_TESTS_DIR := prog_tests
@@ -745,6 +740,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c		\
 			 $(VERIFY_SIG_HDR)		\
 			 flow_dissector_load.h	\
 			 ip_check_defrag_frags.h
+TRUNNER_LIB_SOURCES := find_bit.c
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(OUTPUT)/liburandom_read.so			\
 		       $(OUTPUT)/xdp_synproxy				\
@@ -782,6 +778,7 @@ endif
 TRUNNER_TESTS_DIR := map_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_maps.c
+TRUNNER_LIB_SOURCES :=
 TRUNNER_EXTRA_FILES :=
 TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
 TRUNNER_BPF_CFLAGS :=
@@ -803,7 +800,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
 # Include find_bit.c to compile xskxceiver.
-EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c prog_tests/test_xsk.c prog_tests/test_xsk.h
 $(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
@@ -893,7 +890,8 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)			\
 	$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h	\
 			       no_alu32 cpuv4 bpf_gcc			\
 			       liburandom_read.so)			\
-	$(OUTPUT)/FEATURE-DUMP.selftests
+	$(OUTPUT)/FEATURE-DUMP.selftests				\
+	test_progs_verification_cert
 
 .PHONY: docs docs-clean
 
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index e1ee979e6acc..01bdce692799 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -19,6 +19,8 @@ static struct {
 	int ringbuf_sz; /* per-ringbuf, in bytes */
 	bool ringbuf_use_output; /* use slower output API */
 	int perfbuf_sz; /* per-CPU size, in pages */
+	bool overwrite;
+	bool bench_producer;
 } args = {
 	.back2back = false,
 	.batch_cnt = 500,
@@ -27,6 +29,8 @@ static struct {
 	.ringbuf_sz = 512 * 1024,
 	.ringbuf_use_output = false,
 	.perfbuf_sz = 128,
+	.overwrite = false,
+	.bench_producer = false,
 };
 
 enum {
@@ -35,6 +39,8 @@ enum {
 	ARG_RB_BATCH_CNT = 2002,
 	ARG_RB_SAMPLED = 2003,
 	ARG_RB_SAMPLE_RATE = 2004,
+	ARG_RB_OVERWRITE = 2005,
+	ARG_RB_BENCH_PRODUCER = 2006,
 };
 
 static const struct argp_option opts[] = {
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
 	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
 	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
 	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+	{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
+	{ "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
 	{},
 };
 
@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			argp_usage(state);
 		}
 		break;
+	case ARG_RB_OVERWRITE:
+		args.overwrite = true;
+		break;
+	case ARG_RB_BENCH_PRODUCER:
+		args.bench_producer = true;
+		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
@@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void)
 
 static void bufs_validate(void)
 {
-	if (env.consumer_cnt != 1) {
-		fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
+	if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
+		fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
+		exit(1);
+	}
+
+	if (args.overwrite && !args.bench_producer) {
+		fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
+		exit(1);
+	}
+
+	if (args.bench_producer && env.consumer_cnt != 0) {
+		fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
+		exit(1);
+	}
+
+	if (args.bench_producer && args.back2back) {
+		fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
+		exit(1);
+	}
+
+	if (args.bench_producer && args.sampled) {
+		fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
+		exit(1);
+	}
+
+	if (!args.bench_producer && env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
 		exit(1);
 	}
 
@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
 {
 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
 
-	res->hits = atomic_swap(&buf_hits.value, 0);
+	if (args.bench_producer)
+		res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
+	else
+		res->hits = atomic_swap(&buf_hits.value, 0);
 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
 }
 
 static struct ringbuf_bench *ringbuf_setup_skeleton(void)
 {
+	__u32 flags;
+	struct bpf_map *ringbuf;
 	struct ringbuf_bench *skel;
 
 	setup_libbpf();
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
 
 	skel->rodata->batch_cnt = args.batch_cnt;
 	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+	skel->rodata->bench_producer = args.bench_producer;
 
 	if (args.sampled)
 		/* record data + header take 16 bytes */
 		skel->rodata->wakeup_data_size = args.sample_rate * 16;
 
-	bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
+	ringbuf = skel->maps.ringbuf;
+	if (args.overwrite) {
+		flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
+		bpf_map__set_map_flags(ringbuf, flags);
+	}
+
+	bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
 
 	if (ringbuf_bench__load(skel)) {
 		fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +222,12 @@ static void ringbuf_libbpf_setup(void)
 {
 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
 	struct bpf_link *link;
+	int map_fd;
 
 	ctx->skel = ringbuf_setup_skeleton();
-	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
-					buf_process_sample, NULL, NULL);
+
+	map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+	ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
 	if (!ctx->ringbuf) {
 		fprintf(stderr, "failed to create ringbuf\n");
 		exit(1);
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 1e2aff007c2a..34018fc3927f 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -180,10 +180,10 @@ static void trigger_kernel_count_setup(void)
 {
 	setup_ctx();
 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
-	bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
+	bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true);
 	load_ctx();
 	/* override driver program */
-	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
+	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count);
 }
 
 static void trigger_kprobe_setup(void)
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 91e3567962ff..83e05e837871 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
 	summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
 done
 
+header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+	summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
+done
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
index 85dbc3ea4da5..e16fa7d95fcf 100644
--- a/tools/testing/selftests/bpf/bpf_arena_list.h
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -64,14 +64,12 @@ static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
 
 static inline void __list_del(arena_list_node_t *n)
 {
-	arena_list_node_t *next = n->next, *tmp;
+	arena_list_node_t *next = n->next;
 	arena_list_node_t * __arena *pprev = n->pprev;
 
 	cast_user(next);
 	cast_kern(pprev);
-	tmp = *pprev;
-	cast_kern(tmp);
-	WRITE_ONCE(tmp, next);
+	WRITE_ONCE(*pprev, next);
 	if (next) {
 		cast_user(pprev);
 		cast_kern(next);
diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
new file mode 100644
index 000000000000..c1b6eaa905bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+__noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
+{
+	const char __arena *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		cond_break;
+	return sc - s;
+}
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
+{
+	/*
+	 * Backtrack to previous * on mismatch and retry starting one
+	 * character later in the string.  Because * matches all characters
+	 * (no exception for /), it can be easily proved that there's
+	 * never a need to backtrack multiple levels.
+	 */
+	char const __arena *back_pat = NULL, *back_str;
+
+	/*
+	 * Loop over each token (character or class) in pat, matching
+	 * it against the remaining unmatched tail of str.  Return false
+	 * on mismatch, or true after matching the trailing nul bytes.
+	 */
+	for (;;) {
+		unsigned char c = *str++;
+		unsigned char d = *pat++;
+
+		switch (d) {
+		case '?':	/* Wildcard: anything but nul */
+			if (c == '\0')
+				return false;
+			break;
+		case '*':	/* Any-length wildcard */
+			if (*pat == '\0')	/* Optimize trailing * case */
+				return true;
+			back_pat = pat;
+			back_str = --str;	/* Allow zero-length match */
+			break;
+		case '[': {	/* Character class */
+			bool match = false, inverted = (*pat == '!');
+			char const __arena *class = pat + inverted;
+			unsigned char a = *class++;
+
+			/*
+			 * Iterate over each span in the character class.
+			 * A span is either a single character a, or a
+			 * range a-b.  The first span may begin with ']'.
+			 */
+			do {
+				unsigned char b = a;
+
+				if (a == '\0')	/* Malformed */
+					goto literal;
+
+				if (class[0] == '-' && class[1] != ']') {
+					b = class[1];
+
+					if (b == '\0')
+						goto literal;
+
+					class += 2;
+					/* Any special action if a > b? */
+				}
+				match |= (a <= c && c <= b);
+				cond_break;
+			} while ((a = *class++) != ']');
+
+			if (match == inverted)
+				goto backtrack;
+			pat = class;
+			}
+			break;
+		case '\\':
+			d = *pat++;
+			__attribute__((__fallthrough__));
+		default:	/* Literal character */
+literal:
+			if (c == d) {
+				if (d == '\0')
+					return true;
+				break;
+			}
+backtrack:
+			if (c == '\0' || !back_pat)
+				return false;	/* No point continuing */
+			/* Try again from last *, one character later in str. */
+			pat = back_pat;
+			str = ++back_str;
+			break;
+		}
+		cond_break;
+	}
+	return false;
+}
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 794d44d19c88..e0189254bb6e 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -28,8 +28,8 @@ extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
  *  Either a direct pointer to the dynptr data or a pointer to the user-provided
  *  buffer if unable to obtain a direct pointer
  */
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
-			      void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset,
+			      void *buffer, __u64 buffer__szk) __ksym __weak;
 
 /* Description
  *  Obtain a read-write pointer to the dynptr's data
@@ -37,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
  *  Either a direct pointer to the dynptr data or a pointer to the user-provided
  *  buffer if unable to obtain a direct pointer
  */
-extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
-			      void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer,
+				   __u64 buffer__szk) __ksym __weak;
 
-extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak;
 extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
 extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
-extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
 extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
 
 /* Description
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 70b28c1e653e..558839e3c185 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -50,6 +50,7 @@ CONFIG_IPV6_SIT=y
 CONFIG_IPV6_TUNNEL=y
 CONFIG_KEYS=y
 CONFIG_LIRC=y
+CONFIG_LIVEPATCH=y
 CONFIG_LWTUNNEL=y
 CONFIG_MODULE_SIG=y
 CONFIG_MODULE_SRCVERSION_ALL=y
@@ -111,6 +112,8 @@ CONFIG_IP6_NF_FILTER=y
 CONFIG_NF_NAT=y
 CONFIG_PACKET=y
 CONFIG_RC_CORE=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_LIVEPATCH=m
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
 CONFIG_SYN_COOKIES=y
@@ -123,3 +126,8 @@ CONFIG_XDP_SOCKETS=y
 CONFIG_XFRM_INTERFACE=y
 CONFIG_TCP_CONG_DCTCP=y
 CONFIG_TCP_CONG_BBR=y
+CONFIG_INFINIBAND=y
+CONFIG_SMC=y
+CONFIG_SMC_HS_CTRL_BPF=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y
+\ No newline at end of file
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index cdf7b6641444..0a6a5561bed3 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -97,7 +97,7 @@ int settimeo(int fd, int timeout_ms)
 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
 		      const struct network_helper_opts *opts)
 {
-	int fd;
+	int on = 1, fd;
 
 	if (!opts)
 		opts = &default_opts;
@@ -111,6 +111,12 @@ int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t a
 	if (settimeo(fd, opts->timeout_ms))
 		goto error_close;
 
+	if (type == SOCK_STREAM &&
+	    setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
+		log_err("Failed to enable SO_REUSEADDR");
+		goto error_close;
+	}
+
 	if (opts->post_socket_cb &&
 	    opts->post_socket_cb(fd, opts->cb_opts)) {
 		log_err("Failed to call post_socket_cb");
@@ -766,6 +772,50 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
 	return err;
 }
 
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
+{
+	int ifindex, ret;
+
+	if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
+			 "at least one program fd is valid"))
+		return -1;
+
+	ifindex = if_nametoindex(dev);
+	if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+		return -1;
+
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
+			    .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+			    .priority = 1, .prog_fd = ingress_fd);
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+			    .priority = 1, .prog_fd = egress_fd);
+
+	ret = bpf_tc_hook_create(&hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	if (ingress_fd >= 0) {
+		hook.attach_point = BPF_TC_INGRESS;
+		ret = bpf_tc_attach(&hook, &opts1);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(&hook);
+			return ret;
+		}
+	}
+
+	if (egress_fd >= 0) {
+		hook.attach_point = BPF_TC_EGRESS;
+		ret = bpf_tc_attach(&hook, &opts2);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(&hook);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 #ifdef TRAFFIC_MONITOR
 struct tmonitor_ctx {
 	pcap_t *pcap;
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index ef208eefd571..79a010c88e11 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -255,6 +255,22 @@ struct tmonitor_ctx;
 
 typedef int (*tm_print_fn_t)(const char *format, va_list args);
 
+/**
+ * tc_prog_attach - attach BPF program(s) to an interface
+ *
+ * Takes file descriptors pointing to at least one, at most two BPF
+ * programs, and attach those programs to an interface ingress, egress or
+ * both.
+ *
+ * @dev: string containing the interface name
+ * @ingress_fd: file descriptor of the program to attach to interface ingress
+ * @egress_fd: file descriptor of the program to attach to interface egress
+ *
+ * Returns 0 on success, -1 if no valid file descriptor has been found, if
+ * the interface name is invalid or if an error ocurred during attach.
+ */
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd);
+
 #ifdef TRAFFIC_MONITOR
 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
 					   const char *subtest_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
new file mode 100644
index 000000000000..f81a0c066505
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_strsearch.skel.h"
+
+static void test_arena_str(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	struct arena_strsearch *skel;
+	int ret;
+
+	skel = arena_strsearch__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts);
+	ASSERT_OK(ret, "ret_add");
+	ASSERT_OK(opts.retval, "retval");
+	if (skel->bss->skip) {
+		printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+		test__skip();
+	}
+	arena_strsearch__destroy(skel);
+}
+
+void test_arena_strsearch(void)
+{
+	if (test__start_subtest("arena_strsearch"))
+		test_arena_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
index bb143de68875..e27d66b75fb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -144,11 +144,17 @@ static void test_parse_test_list_file(void)
 	if (!ASSERT_OK(ferror(fp), "prepare tmp"))
 		goto out_fclose;
 
+	if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp"))
+		goto out_fclose;
+
 	init_test_filter_set(&set);
 
-	ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+	if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"))
+		goto out_fclose;
+
+	if (!ASSERT_EQ(set.cnt, 4, "test  count"))
+		goto out_free_set;
 
-	ASSERT_EQ(set.cnt, 4, "test  count");
 	ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
 	ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
 	ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
@@ -158,8 +164,8 @@ static void test_parse_test_list_file(void)
 	ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
 	ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
 
+out_free_set:
 	free_test_filter_set(&set);
-
 out_fclose:
 	fclose(fp);
 out_remove:
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
new file mode 100644
index 000000000000..d138cc7b1bda
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <sys/syscall.h>
+#include <bpf/bpf.h>
+
+#include "bpf_gotox.skel.h"
+
+static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+			    .ctx_in = ctx_in,
+			    .ctx_size_in = ctx_size_in,
+		   );
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+}
+
+static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *))
+{
+	if (skel->data->skip)
+		test__skip();
+	else
+		check(skel);
+}
+
+static void check_simple(struct bpf_gotox *skel,
+			 struct bpf_program *prog,
+			 __u64 ctx_in,
+			 __u64 expected)
+{
+	skel->bss->ret_user = 0;
+
+	__test_run(prog, &ctx_in, sizeof(ctx_in));
+
+	if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+		return;
+}
+
+static void check_simple_fentry(struct bpf_gotox *skel,
+				struct bpf_program *prog,
+				__u64 ctx_in,
+				__u64 expected)
+{
+	skel->bss->in_user = ctx_in;
+	skel->bss->ret_user = 0;
+
+	/* trigger */
+	usleep(1);
+
+	if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+		return;
+}
+
+/* validate that for two loads of the same jump table libbpf generates only one map */
+static void check_one_map_two_jumps(struct bpf_gotox *skel)
+{
+	struct bpf_prog_info prog_info;
+	struct bpf_map_info map_info;
+	__u32 len;
+	__u32 map_ids[16];
+	int prog_fd, map_fd;
+	int ret;
+	int i;
+	bool seen = false;
+
+	memset(&prog_info, 0, sizeof(prog_info));
+	prog_info.map_ids = (long)map_ids;
+	prog_info.nr_map_ids = ARRAY_SIZE(map_ids);
+	prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps);
+	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)"))
+		return;
+
+	len = sizeof(prog_info);
+	ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len);
+	if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)"))
+		return;
+
+	for (i = 0; i < prog_info.nr_map_ids; i++) {
+		map_fd  = bpf_map_get_fd_by_id(map_ids[i]);
+		if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
+			return;
+
+		len = sizeof(map_info);
+		memset(&map_info, 0, len);
+		ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len);
+		if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) {
+			close(map_fd);
+			return;
+		}
+
+		if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) {
+			if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) {
+				close(map_fd);
+				return;
+			}
+			seen = true;
+		}
+		close(map_fd);
+	}
+
+	ASSERT_EQ(seen, true, "no INSN_ARRAY map");
+}
+
+static void check_one_switch(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.one_switch, in[i], out[i]);
+}
+
+static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]);
+}
+
+static void check_two_switches(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[] = {103, 104, 107, 205, 115, 1019, 1019};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.two_switches, in[i], out[i]);
+}
+
+static void check_big_jump_table(struct bpf_gotox *skel)
+{
+	__u64 in[]  = {0, 11, 27, 31, 22, 45, 99};
+	__u64 out[] = {2,  3,  4,  5, 19, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.big_jump_table, in[i], out[i]);
+}
+
+static void check_one_jump_two_maps(struct bpf_gotox *skel)
+{
+	__u64 in[]  = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[] = {12, 15, 7 , 15, 12, 15, 15};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]);
+}
+
+static void check_static_global(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_static_global1, in[i], out[i]);
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_static_global2, in[i], out[i]);
+}
+
+static void check_nonstatic_global(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]);
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]);
+}
+
+static void check_other_sec(struct bpf_gotox *skel)
+{
+	struct bpf_link *link;
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	link = bpf_program__attach(skel->progs.simple_test_other_sec);
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]);
+
+	bpf_link__destroy(link);
+}
+
+static void check_static_global_other_sec(struct bpf_gotox *skel)
+{
+	struct bpf_link *link;
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	link = bpf_program__attach(skel->progs.use_static_global_other_sec);
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]);
+
+	bpf_link__destroy(link);
+}
+
+static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
+{
+	struct bpf_link *link;
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec);
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]);
+
+	bpf_link__destroy(link);
+}
+
+void test_bpf_gotox(void)
+{
+	struct bpf_gotox *skel;
+	int ret;
+
+	skel = bpf_gotox__open();
+	if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open"))
+		return;
+
+	ret = bpf_gotox__load(skel);
+	if (!ASSERT_OK(ret, "bpf_gotox__load"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	if (test__start_subtest("one-switch"))
+		__subtest(skel, check_one_switch);
+
+	if (test__start_subtest("one-switch-non-zero-sec-offset"))
+		__subtest(skel, check_one_switch_non_zero_sec_off);
+
+	if (test__start_subtest("two-switches"))
+		__subtest(skel, check_two_switches);
+
+	if (test__start_subtest("big-jump-table"))
+		__subtest(skel, check_big_jump_table);
+
+	if (test__start_subtest("static-global"))
+		__subtest(skel, check_static_global);
+
+	if (test__start_subtest("nonstatic-global"))
+		__subtest(skel, check_nonstatic_global);
+
+	if (test__start_subtest("other-sec"))
+		__subtest(skel, check_other_sec);
+
+	if (test__start_subtest("static-global-other-sec"))
+		__subtest(skel, check_static_global_other_sec);
+
+	if (test__start_subtest("nonstatic-global-other-sec"))
+		__subtest(skel, check_nonstatic_global_other_sec);
+
+	if (test__start_subtest("one-jump-two-maps"))
+		__subtest(skel, check_one_jump_two_maps);
+
+	if (test__start_subtest("one-map-two-jumps"))
+		__subtest(skel, check_one_map_two_jumps);
+
+	bpf_gotox__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
new file mode 100644
index 000000000000..269870bec941
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf/bpf.h>
+#include <test_progs.h>
+
+#ifdef __x86_64__
+static int map_create(__u32 map_type, __u32 max_entries)
+{
+	const char *map_name = "insn_array";
+	__u32 key_size = 4;
+	__u32 value_size = sizeof(struct bpf_insn_array_value);
+
+	return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL);
+}
+
+static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+	opts.fd_array = fd_array;
+	opts.fd_array_cnt = fd_array_cnt;
+
+	return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts);
+}
+
+static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out)
+{
+	struct bpf_insn_array_value val = {};
+	int prog_fd = -1, map_fd, i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < insn_cnt; i++) {
+		val.orig_off = map_in[i];
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, insn_cnt, &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < insn_cnt; i++) {
+		char buf[64];
+
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i);
+		ASSERT_EQ(val.xlated_off, map_out[i], buf);
+	}
+
+cleanup:
+	close(prog_fd);
+	close(map_fd);
+}
+
+/*
+ * Load a program, which will not be anyhow mangled by the verifier.  Add an
+ * insn_array map pointing to every instruction. Check that it hasn't changed
+ * after the program load.
+ */
+static void check_one_to_one_mapping(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 4),
+		BPF_MOV64_IMM(BPF_REG_0, 3),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] = {0, 1, 2, 3, 4, 5};
+	__u32 map_out[] = {0, 1, 2, 3, 4, 5};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Load a program with two patches (get jiffies, for simplicity). Add an
+ * insn_array map pointing to every instruction. Check how it was changed
+ * after the program load.
+ */
+static void check_simple(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] = {0, 1, 2, 3, 4, 5};
+	__u32 map_out[] = {0, 1, 4, 5, 8, 9};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Verifier can delete code in two cases: nops & dead code. From insn
+ * array's point of view, the two cases are the same, so test using
+ * the simplest method: by loading some nops
+ */
+static void check_deletions(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] = {0, 1, 2, 3, 4, 5};
+	__u32 map_out[] = {0, -1, 1, -1, 2, 3};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Same test as check_deletions, but also add code which adds instructions
+ */
+static void check_deletions_with_functions(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] =  { 0, 1,  2, 3, 4, 5, /* func */  6, 7,  8, 9, 10};
+	__u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Try to load a program with a map which points to outside of the program
+ */
+static void check_out_of_bounds_index(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 4),
+		BPF_MOV64_IMM(BPF_REG_0, 3),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, map_fd;
+	struct bpf_insn_array_value val = {};
+	int key;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	key = 0;
+	val.orig_off = ARRAY_SIZE(insns); /* too big */
+	if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+		close(prog_fd);
+		goto cleanup;
+	}
+
+cleanup:
+	close(map_fd);
+}
+
+/*
+ * Try to load a program with a map which points to the middle of 16-bit insn
+ */
+static void check_mid_insn_index(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, map_fd;
+	struct bpf_insn_array_value val = {};
+	int key;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	key = 0;
+	val.orig_off = 1; /* middle of 16-byte instruction */
+	if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+		close(prog_fd);
+		goto cleanup;
+	}
+
+cleanup:
+	close(map_fd);
+}
+
+static void check_incorrect_index(void)
+{
+	check_out_of_bounds_index();
+	check_mid_insn_index();
+}
+
+static int set_bpf_jit_harden(char *level)
+{
+	char old_level;
+	int err = -1;
+	int fd = -1;
+
+	fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK);
+	if (fd < 0) {
+		ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno);
+		return -1;
+	}
+
+	err = read(fd, &old_level, 1);
+	if (err != 1) {
+		ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+		err = -1;
+		goto end;
+	}
+
+	lseek(fd, 0, SEEK_SET);
+
+	err = write(fd, level, 1);
+	if (err != 1) {
+		ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+		err = -1;
+		goto end;
+	}
+
+	err = 0;
+	*level = old_level;
+end:
+	if (fd >= 0)
+		close(fd);
+	return err;
+}
+
+static void check_blindness(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 4),
+		BPF_MOV64_IMM(BPF_REG_0, 3),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd;
+	struct bpf_insn_array_value val = {};
+	char bpf_jit_harden = '@'; /* non-exizsting value */
+	int i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		val.orig_off = i;
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	bpf_jit_harden = '2';
+	if (set_bpf_jit_harden(&bpf_jit_harden)) {
+		bpf_jit_harden = '@'; /* open, read or write failed => no write was done */
+		goto cleanup;
+	}
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		char fmt[32];
+
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i);
+		ASSERT_EQ(val.xlated_off, i * 3, fmt);
+	}
+
+cleanup:
+	/* restore the old one */
+	if (bpf_jit_harden != '@')
+		set_bpf_jit_harden(&bpf_jit_harden);
+
+	close(prog_fd);
+	close(map_fd);
+}
+
+/* Once map was initialized, it should be frozen */
+static void check_load_unfrozen_map(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd;
+	struct bpf_insn_array_value val = {};
+	int i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		val.orig_off = i;
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+		goto cleanup;
+
+	/* correctness: now freeze the map, the program should load fine */
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+	}
+
+cleanup:
+	close(prog_fd);
+	close(map_fd);
+}
+
+/* Map can be used only by one BPF program */
+static void check_no_map_reuse(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd, extra_fd = -1;
+	struct bpf_insn_array_value val = {};
+	int i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		val.orig_off = i;
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+	}
+
+	extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)"))
+		goto cleanup;
+
+	/* correctness: check that prog is still loadable without fd_array */
+	extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+	if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error"))
+		goto cleanup;
+
+cleanup:
+	close(extra_fd);
+	close(prog_fd);
+	close(map_fd);
+}
+
+static void check_bpf_no_lookup(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, 0),
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	insns[0].imm = map_fd;
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+		goto cleanup;
+
+	/* correctness: check that prog is still loadable with normal map */
+	close(map_fd);
+	map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1);
+	insns[0].imm = map_fd;
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+cleanup:
+	close(prog_fd);
+	close(map_fd);
+}
+
+static void check_bpf_side(void)
+{
+	check_bpf_no_lookup();
+}
+
+static void __test_bpf_insn_array(void)
+{
+	/* Test if offsets are adjusted properly */
+
+	if (test__start_subtest("one2one"))
+		check_one_to_one_mapping();
+
+	if (test__start_subtest("simple"))
+		check_simple();
+
+	if (test__start_subtest("deletions"))
+		check_deletions();
+
+	if (test__start_subtest("deletions-with-functions"))
+		check_deletions_with_functions();
+
+	if (test__start_subtest("blindness"))
+		check_blindness();
+
+	/* Check all kinds of operations and related restrictions */
+
+	if (test__start_subtest("incorrect-index"))
+		check_incorrect_index();
+
+	if (test__start_subtest("load-unfrozen-map"))
+		check_load_unfrozen_map();
+
+	if (test__start_subtest("no-map-reuse"))
+		check_no_map_reuse();
+
+	if (test__start_subtest("bpf-side-ops"))
+		check_bpf_side();
+}
+#else
+static void __test_bpf_insn_array(void)
+{
+	test__skip();
+}
+#endif
+
+void test_bpf_insn_array(void)
+{
+	__test_bpf_insn_array();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 8a9ba4292109..054ecb6b1e9f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -7496,6 +7496,71 @@ static struct btf_dedup_test dedup_tests[] = {
 	},
 },
 {
+	.descr = "dedup: recursive typedef",
+	/*
+	 * This test simulates a recursive typedef, which in GO is defined as such:
+	 *
+	 *   type Foo func() Foo
+	 *
+	 * In BTF terms, this is represented as a TYPEDEF referencing
+	 * a FUNC_PROTO that returns the same TYPEDEF.
+	 */
+	.input = {
+		.raw_types = {
+			/*
+			 * [1] typedef Foo -> func() Foo
+			 * [2] func_proto() -> Foo
+			 * [3] typedef Foo -> func() Foo
+			 * [4] func_proto() -> Foo
+			 */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 2),	/* [1] */
+			BTF_FUNC_PROTO_ENC(1, 0),		/* [2] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 4),	/* [3] */
+			BTF_FUNC_PROTO_ENC(3, 0),		/* [4] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0Foo"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 2),	/* [1] */
+			BTF_FUNC_PROTO_ENC(1, 0),		/* [2] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0Foo"),
+	},
+},
+{
+	.descr = "dedup: typedef",
+    /*
+     * // CU 1:
+     * typedef int foo;
+     *
+     * // CU 2:
+     * typedef int foo;
+     */
+	.input = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 1),		/* [2] */
+			/* CU 2 */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [3] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 3),		/* [4] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 1),		/* [2] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo"),
+	},
+},
+{
 	.descr = "dedup: typedef tags",
 	.input = {
 		.raw_types = {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index 3696fb9a05ed..2d47cad50a51 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,11 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
 	vfprintf(ctx, fmt, args);
 }
 
+/* Write raw BTF to file, return number of bytes written or negative errno */
+static ssize_t btf_raw_write(struct btf *btf, char *file)
+{
+	ssize_t written = 0;
+	const void *data;
+	__u32 size = 0;
+	int fd, ret;
+
+	fd = mkstemp(file);
+	if (!ASSERT_GE(fd, 0, "create_file"))
+		return -errno;
+
+	data = btf__raw_data(btf, &size);
+	if (!ASSERT_OK_PTR(data, "btf__raw_data")) {
+		close(fd);
+		return -EINVAL;
+	}
+	while (written < size) {
+		ret = write(fd, data + written, size - written);
+		if (!ASSERT_GE(ret, 0, "write succeeded")) {
+			close(fd);
+			return -errno;
+		}
+		written += ret;
+	}
+	close(fd);
+	return written;
+}
+
 static void __test_btf_split(bool multi)
 {
+	char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX";
+	char split_btf_file[] = "/tmp/test_btf_split.XXXXXX";
+	char base_btf_file[] = "/tmp/test_btf_base.XXXXXX";
+	ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0;
 	struct btf_dump *d = NULL;
-	const struct btf_type *t;
-	struct btf *btf1, *btf2, *btf3 = NULL;
+	const struct btf_type *t, *ot;
+	struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL;
+	struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL;
 	int str_off, i, err;
 
 	btf1 = btf__new_empty();
@@ -123,6 +157,45 @@ static void __test_btf_split(bool multi)
 "	int uf2;\n"
 "};\n\n", "c_dump");
 
+	/* write base, split BTFs to files and ensure parsing succeeds */
+	base_btf_sz = btf_raw_write(btf1, base_btf_file);
+	if (base_btf_sz < 0)
+		goto cleanup;
+	split_btf_sz = btf_raw_write(btf2, split_btf_file);
+	if (split_btf_sz < 0)
+		goto cleanup;
+	btf4 = btf__parse(base_btf_file, NULL);
+	if (!ASSERT_OK_PTR(btf4, "parse_base"))
+		goto cleanup;
+	btf5 = btf__parse_split(split_btf_file, btf4);
+	if (!ASSERT_OK_PTR(btf5, "parse_split"))
+		goto cleanup;
+	if (multi) {
+		multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file);
+		if (multisplit_btf_sz < 0)
+			goto cleanup;
+		btf6 = btf__parse_split(multisplit_btf_file, btf5);
+		if (!ASSERT_OK_PTR(btf6, "parse_multisplit"))
+			goto cleanup;
+	} else {
+		btf6 = btf5;
+	}
+
+	if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt"))
+		goto cleanup;
+
+	/* compare parsed to original BTF */
+	for (i = 1; i < btf__type_cnt(btf6); i++) {
+		t = btf__type_by_id(btf6, i);
+		if (!ASSERT_OK_PTR(t, "type_in_parsed_btf"))
+			goto cleanup;
+		ot = btf__type_by_id(btf3, i);
+		if (!ASSERT_OK_PTR(ot, "type_in_orig_btf"))
+			goto cleanup;
+		if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf"))
+			goto cleanup;
+	}
+
 cleanup:
 	if (dump_buf_file)
 		fclose(dump_buf_file);
@@ -132,6 +205,16 @@ cleanup:
 	btf__free(btf2);
 	if (btf2 != btf3)
 		btf__free(btf3);
+	btf__free(btf4);
+	btf__free(btf5);
+	if (btf5 != btf6)
+		btf__free(btf6);
+	if (base_btf_sz > 0)
+		unlink(base_btf_file);
+	if (split_btf_sz > 0)
+		unlink(split_btf_file);
+	if (multisplit_btf_sz > 0)
+		unlink(multisplit_btf_file);
 }
 
 void test_btf_split(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 2a9a30650350..65b4512967e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
 	ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
 }
 
+static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu)
+{
+	int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag);
+	struct __sk_buff skb = {
+		.gso_size = 10,
+	};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .ctx_in = &skb,
+		    .ctx_size_in = sizeof(skb),
+	);
+
+	/* Lower the mtu to test the BPF_MTU_CHK_SEGS */
+	SYS_NOFAIL("ip link set dev lo mtu 10");
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	SYS_NOFAIL("ip link set dev lo mtu %u", mtu);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, BPF_OK, "retval");
+}
 
 static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
 {
@@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
 	test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
 	test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
 	test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
+	test_chk_segs_flag(skel, mtu);
 cleanup:
 	test_check_mtu__destroy(skel);
 }
 
-void serial_test_check_mtu(void)
+void test_ns_check_mtu(void)
 {
 	int mtu_lo;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 34b59f6baca1..7488a7606e6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -22,79 +22,37 @@
 
 static int duration = 0;
 
-struct addr_port {
-	in_port_t port;
-	union {
-		struct in_addr in_addr;
-		struct in6_addr in6_addr;
-	};
-};
-
-struct tuple {
-	int family;
-	struct addr_port src;
-	struct addr_port dst;
-};
-
-static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
-{
-	const struct sockaddr_in6 *in6;
-	const struct sockaddr_in *in;
-
-	switch (sa->sa_family) {
-	case AF_INET:
-		in = (const struct sockaddr_in *)sa;
-		ap->in_addr = in->sin_addr;
-		ap->port = in->sin_port;
-		return true;
-
-	case AF_INET6:
-		in6 = (const struct sockaddr_in6 *)sa;
-		ap->in6_addr = in6->sin6_addr;
-		ap->port = in6->sin6_port;
-		return true;
-
-	default:
-		return false;
-	}
-}
 
-static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
-			int *server, int *conn, struct tuple *tuple)
+static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type,
+			int *server, int *conn,
+			struct sockaddr_storage *src,
+			struct sockaddr_storage *dst)
 {
 	struct sockaddr_storage ss;
 	socklen_t slen = sizeof(ss);
-	struct sockaddr *sa = (struct sockaddr *)&ss;
 
-	*server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL);
+	*server = start_server_addr(type, addr, len, NULL);
 	if (*server < 0)
 		return false;
 
-	if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+	if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen)))
 		goto close_server;
 
-	*conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL);
+	*conn = connect_to_addr(type, &ss, slen, NULL);
 	if (*conn < 0)
 		goto close_server;
 
 	/* We want to simulate packets arriving at conn, so we have to
 	 * swap src and dst.
 	 */
-	slen = sizeof(ss);
-	if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
-		goto close_conn;
-
-	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+	slen = sizeof(*dst);
+	if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen)))
 		goto close_conn;
 
-	slen = sizeof(ss);
-	if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+	slen = sizeof(*src);
+	if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen)))
 		goto close_conn;
 
-	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
-		goto close_conn;
-
-	tuple->family = ss.ss_family;
 	return true;
 
 close_conn:
@@ -110,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
 {
 	struct sockaddr_in *addr4;
 	struct sockaddr_in6 *addr6;
+	memset(addr, 0, sizeof(*addr));
 
 	switch (family) {
 	case AF_INET:
 		addr4 = (struct sockaddr_in *)addr;
-		memset(addr4, 0, sizeof(*addr4));
 		addr4->sin_family = family;
 		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 		return sizeof(*addr4);
 	case AF_INET6:
 		addr6 = (struct sockaddr_in6 *)addr;
-		memset(addr6, 0, sizeof(*addr6));
 		addr6->sin6_family = family;
 		addr6->sin6_addr = in6addr_loopback;
 		return sizeof(*addr6);
@@ -242,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
 }
 
 static size_t build_input(const struct test_cfg *test, void *const buf,
-			  const struct tuple *tuple)
+			  const struct sockaddr_storage *src,
+			  const struct sockaddr_storage *dst)
 {
-	in_port_t sport = tuple->src.port;
+	struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src;
+	struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst;
+	struct sockaddr_in *src_in = (struct sockaddr_in *)src;
+	struct sockaddr_in *dst_in = (struct sockaddr_in *)dst;
+	sa_family_t family = src->ss_family;
+	in_port_t sport, dport;
 	encap_headers_t encap;
 	struct iphdr ip;
 	struct ipv6hdr ipv6;
@@ -254,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 	uint8_t *p = buf;
 	int proto;
 
+	sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port;
+	dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port;
+
 	proto = IPPROTO_IPIP;
-	if (tuple->family == AF_INET6)
+	if (family == AF_INET6)
 		proto = IPPROTO_IPV6;
 
 	encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
@@ -270,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 	if (test->type == UDP)
 		proto = IPPROTO_UDP;
 
-	switch (tuple->family) {
+	switch (family) {
 	case AF_INET:
 		ip = (struct iphdr){
 			.ihl = 5,
 			.version = 4,
 			.ttl = IPDEFTTL,
 			.protocol = proto,
-			.saddr = tuple->src.in_addr.s_addr,
-			.daddr = tuple->dst.in_addr.s_addr,
+			.saddr = src_in->sin_addr.s_addr,
+			.daddr = dst_in->sin_addr.s_addr,
 		};
 		p = mempcpy(p, &ip, sizeof(ip));
 		break;
@@ -287,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 			.version = 6,
 			.hop_limit = IPDEFTTL,
 			.nexthdr = proto,
-			.saddr = tuple->src.in6_addr,
-			.daddr = tuple->dst.in6_addr,
+			.saddr = src_in6->sin6_addr,
+			.daddr = dst_in6->sin6_addr,
 		};
 		p = mempcpy(p, &ipv6, sizeof(ipv6));
 		break;
@@ -303,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 	case TCP:
 		tcp = (struct tcphdr){
 			.source = sport,
-			.dest = tuple->dst.port,
+			.dest = dport,
+			.syn = (test->flags == SYN),
+			.ack = (test->flags == ACK),
 		};
-		if (test->flags == SYN)
-			tcp.syn = true;
-		if (test->flags == ACK)
-			tcp.ack = true;
 		p = mempcpy(p, &tcp, sizeof(tcp));
 		break;
 	case UDP:
 		udp = (struct udphdr){
 			.source = sport,
-			.dest = tuple->dst.port,
+			.dest = dport,
 		};
 		p = mempcpy(p, &udp, sizeof(udp));
 		break;
@@ -339,27 +303,26 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 	LIBBPF_OPTS(bpf_test_run_opts, tattr);
 	int families[] = { AF_INET, AF_INET6 };
 	struct sockaddr_storage ss;
-	struct sockaddr *addr;
 	socklen_t slen;
 	int i, j, err, prog_fd;
 	int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
 	int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
-	struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+	struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)];
+	struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)];
 
-	addr = (struct sockaddr *)&ss;
 	for (i = 0; i < ARRAY_SIZE(families); i++) {
 		slen = prepare_addr(&ss, families[i]);
 		if (CHECK_FAIL(!slen))
 			goto cleanup;
 
-		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+		if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM,
 					    &servers[UDP][i], &conns[UDP][i],
-					    &tuples[UDP][i])))
+					    &srcs[UDP][i], &dsts[UDP][i])))
 			goto cleanup;
 
-		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+		if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM,
 					    &servers[TCP][i], &conns[TCP][i],
-					    &tuples[TCP][i])))
+					    &srcs[TCP][i], &dsts[TCP][i])))
 			goto cleanup;
 	}
 
@@ -368,11 +331,12 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 		struct test_cfg *test = &tests[i];
 
 		for (j = 0; j < ARRAY_SIZE(families); j++) {
-			struct tuple *tuple = &tuples[test->type][j];
+			struct sockaddr_storage *src = &srcs[test->type][j];
+			struct sockaddr_storage *dst = &dsts[test->type][j];
 			char input[256];
 			char tmp[256];
 
-			test_str(tmp, sizeof(tmp), test, tuple->family);
+			test_str(tmp, sizeof(tmp), test, families[j]);
 			if (!test__start_subtest(tmp))
 				continue;
 
@@ -380,7 +344,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 			tattr.data_size_out = sizeof(tmp);
 
 			tattr.data_in = input;
-			tattr.data_size_in = build_input(test, input, tuple);
+			tattr.data_size_in = build_input(test, input, src, dst);
 			if (CHECK_FAIL(!tattr.data_size_in))
 				continue;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c
new file mode 100644
index 000000000000..5cde32b35da4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "file_reader.skel.h"
+#include "file_reader_fail.skel.h"
+#include <dlfcn.h>
+#include <sys/mman.h>
+
+const char *user_ptr = "hello world";
+char file_contents[256000];
+
+void *get_executable_base_addr(void)
+{
+	Dl_info info;
+
+	if (!dladdr((void *)&get_executable_base_addr, &info)) {
+		fprintf(stderr, "dladdr failed\n");
+		return NULL;
+	}
+
+	return info.dli_fbase;
+}
+
+static int initialize_file_contents(void)
+{
+	int fd, page_sz = sysconf(_SC_PAGESIZE);
+	ssize_t n = 0, cur, off;
+	void *addr;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n"))
+		return 1;
+
+	do {
+		cur = read(fd, file_contents + n, sizeof(file_contents) - n);
+		if (!ASSERT_GT(cur, 0, "read success"))
+			break;
+		n += cur;
+	} while (n < sizeof(file_contents));
+
+	close(fd);
+
+	if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n"))
+		return 1;
+
+	addr = get_executable_base_addr();
+	if (!ASSERT_NEQ(addr, NULL, "get executable address"))
+		return 1;
+
+	/* page-align base file address */
+	addr = (void *)((unsigned long)addr & ~(page_sz - 1));
+
+	/*
+	 * Page out range 0..512K, use 0..256K for positive tests and
+	 * 256K..512K for negative tests expecting page faults
+	 */
+	for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) {
+		if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT),
+			       "madvise pageout"))
+			return errno;
+	}
+
+	return 0;
+}
+
+static void run_test(const char *prog_name)
+{
+	struct file_reader *skel;
+	struct bpf_program *prog;
+	int err, fd;
+
+	err = initialize_file_contents();
+	if (!ASSERT_OK(err, "initialize file contents"))
+		return;
+
+	skel = file_reader__open();
+	if (!ASSERT_OK_PTR(skel, "file_reader__open"))
+		return;
+
+	bpf_object__for_each_program(prog, skel->obj) {
+		bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0);
+	}
+
+	memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents));
+	skel->bss->pid = getpid();
+
+	err = file_reader__load(skel);
+	if (!ASSERT_OK(err, "file_reader__load"))
+		goto cleanup;
+
+	err = file_reader__attach(skel);
+	if (!ASSERT_OK(err, "file_reader__attach"))
+		goto cleanup;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (fd >= 0)
+		close(fd);
+
+	ASSERT_EQ(skel->bss->err, 0, "err");
+	ASSERT_EQ(skel->bss->run_success, 1, "run_success");
+cleanup:
+	file_reader__destroy(skel);
+}
+
+void test_file_reader(void)
+{
+	if (test__start_subtest("on_open_expect_fault"))
+		run_test("on_open_expect_fault");
+
+	if (test__start_subtest("on_open_validate_file_read"))
+		run_test("on_open_validate_file_read");
+
+	if (test__start_subtest("negative"))
+		RUN_TESTS(file_reader_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
index 2bc85f4814f4..d0b405eb2966 100644
--- a/tools/testing/selftests/bpf/prog_tests/htab_update.c
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -15,17 +15,17 @@ struct htab_update_ctx {
 static void test_reenter_update(void)
 {
 	struct htab_update *skel;
-	unsigned int key, value;
+	void *value = NULL;
+	unsigned int key, value_size;
 	int err;
 
 	skel = htab_update__open();
 	if (!ASSERT_OK_PTR(skel, "htab_update__open"))
 		return;
 
-	/* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
-	bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
+	bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true);
 	err = htab_update__load(skel);
-	if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
+	if (!ASSERT_TRUE(!err, "htab_update__load") || err)
 		goto out;
 
 	skel->bss->pid = getpid();
@@ -33,14 +33,33 @@ static void test_reenter_update(void)
 	if (!ASSERT_OK(err, "htab_update__attach"))
 		goto out;
 
-	/* Will trigger the reentrancy of bpf_map_update_elem() */
+	value_size = bpf_map__value_size(skel->maps.htab);
+
+	value = calloc(1, value_size);
+	if (!ASSERT_OK_PTR(value, "calloc value"))
+		goto out;
+	/*
+	 * First update: plain insert. This should NOT trigger the re-entrancy
+	 * path, because there is no old element to free yet.
+	 */
 	key = 0;
-	value = 0;
-	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
-	if (!ASSERT_OK(err, "add element"))
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+	if (!ASSERT_OK(err, "first update (insert)"))
+		goto out;
+
+	/*
+	 * Second update: replace existing element with same key and trigger
+	 * the reentrancy of bpf_map_update_elem().
+	 * check_and_free_fields() calls bpf_obj_free_fields() on the old
+	 * value, which is where fentry program runs and performs a nested
+	 * bpf_map_update_elem(), triggering -EDEADLK.
+	 */
+	memset(value, 0, value_size);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+	if (!ASSERT_OK(err, "second update (replace)"))
 		goto out;
 
-	ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
+	ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy");
 out:
 	htab_update__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 1de14b111931..6e35e13c2022 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -57,7 +57,8 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel)
 		if (!ASSERT_OK(ret, "kmem_cache_lookup"))
 			break;
 
-		ASSERT_STREQ(r.name, name, "kmem_cache_name");
+		ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1,
+			      "kmem_cache_name");
 		ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize");
 
 		seen++;
diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
new file mode 100644
index 000000000000..72aa5376c30e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "testing_helpers.h"
+#include "livepatch_trampoline.skel.h"
+
+static int load_livepatch(void)
+{
+	char path[4096];
+
+	/* CI will set KBUILD_OUTPUT */
+	snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko",
+		 getenv("KBUILD_OUTPUT") ? : "../../../..");
+
+	return load_module(path, env_verbosity > VERBOSE_NONE);
+}
+
+static void unload_livepatch(void)
+{
+	/* Disable the livepatch before unloading the module */
+	system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled");
+
+	unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE);
+}
+
+static void read_proc_cmdline(void)
+{
+	char buf[4096];
+	int fd, ret;
+
+	fd = open("/proc/cmdline", O_RDONLY);
+	if (!ASSERT_OK_FD(fd, "open /proc/cmdline"))
+		return;
+
+	ret = read(fd, buf, sizeof(buf));
+	if (!ASSERT_GT(ret, 0, "read /proc/cmdline"))
+		goto out;
+
+	ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp");
+
+out:
+	close(fd);
+}
+
+static void __test_livepatch_trampoline(bool fexit_first)
+{
+	struct livepatch_trampoline *skel = NULL;
+	int err;
+
+	skel = livepatch_trampoline__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		goto out;
+
+	skel->bss->my_pid = getpid();
+
+	if (!fexit_first) {
+		/* fentry program is loaded first by default */
+		err = livepatch_trampoline__attach(skel);
+		if (!ASSERT_OK(err, "skel_attach"))
+			goto out;
+	} else {
+		/* Manually load fexit program first. */
+		skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline);
+		if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit"))
+			goto out;
+
+		skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline);
+		if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry"))
+			goto out;
+	}
+
+	read_proc_cmdline();
+
+	ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit");
+	ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit");
+out:
+	livepatch_trampoline__destroy(skel);
+}
+
+void test_livepatch_trampoline(void)
+{
+	int retry_cnt = 0;
+
+retry:
+	if (load_livepatch()) {
+		if (retry_cnt) {
+			ASSERT_OK(1, "load_livepatch");
+			goto out;
+		}
+		/*
+		 * Something else (previous run of the same test?) loaded
+		 * the KLP module. Unload the KLP module and retry.
+		 */
+		unload_livepatch();
+		retry_cnt++;
+		goto retry;
+	}
+
+	if (test__start_subtest("fentry_first"))
+		__test_livepatch_trampoline(false);
+
+	if (test__start_subtest("fexit_first"))
+		__test_livepatch_trampoline(true);
+out:
+	unload_livepatch();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index f8eb7f9d4fd2..8fade8bdc451 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -6,11 +6,13 @@
 #include <netinet/in.h>
 #include <test_progs.h>
 #include <unistd.h>
+#include <errno.h>
 #include "cgroup_helpers.h"
 #include "network_helpers.h"
 #include "mptcp_sock.skel.h"
 #include "mptcpify.skel.h"
 #include "mptcp_subflow.skel.h"
+#include "mptcp_sockmap.skel.h"
 
 #define NS_TEST "mptcp_ns"
 #define ADDR_1	"10.0.1.1"
@@ -436,6 +438,142 @@ close_cgroup:
 	close(cgroup_fd);
 }
 
+/* Test sockmap on MPTCP server handling non-mp-capable clients. */
+static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel)
+{
+	int listen_fd = -1, client_fd1 = -1, client_fd2 = -1;
+	int server_fd1 = -1, server_fd2 = -1, sent, recvd;
+	char snd[9] = "123456789";
+	char rcv[10];
+
+	/* start server with MPTCP enabled */
+	listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+	if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server"))
+		return;
+
+	skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+	skel->bss->sk_index = 0;
+	/* create client without MPTCP enabled */
+	client_fd1 = connect_to_fd_opts(listen_fd, NULL);
+	if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd"))
+		goto end;
+
+	server_fd1 = accept(listen_fd, NULL, 0);
+	skel->bss->sk_index = 1;
+	client_fd2 = connect_to_fd_opts(listen_fd, NULL);
+	if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd"))
+		goto end;
+
+	server_fd2 = accept(listen_fd, NULL, 0);
+	/* test normal redirect behavior: data sent by client_fd1 can be
+	 * received by client_fd2
+	 */
+	skel->bss->redirect_idx = 1;
+	sent = send(client_fd1, snd, sizeof(snd), 0);
+	if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)"))
+		goto end;
+
+	/* try to recv more bytes to avoid truncation check */
+	recvd = recv(client_fd2, rcv, sizeof(rcv), 0);
+	if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)"))
+		goto end;
+
+end:
+	if (client_fd1 >= 0)
+		close(client_fd1);
+	if (client_fd2 >= 0)
+		close(client_fd2);
+	if (server_fd1 >= 0)
+		close(server_fd1);
+	if (server_fd2 >= 0)
+		close(server_fd2);
+	close(listen_fd);
+}
+
+/* Test sockmap rejection of MPTCP sockets - both server and client sides. */
+static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel)
+{
+	int listen_fd = -1, server_fd = -1, client_fd1 = -1;
+	int err, zero = 0;
+
+	/* start server with MPTCP enabled */
+	listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+	if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server"))
+		return;
+
+	skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+	skel->bss->sk_index = 0;
+	/* create client with MPTCP enabled */
+	client_fd1 = connect_to_fd(listen_fd, 0);
+	if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1"))
+		goto end;
+
+	/* bpf_sock_map_update() called from sockops should reject MPTCP sk */
+	if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject"))
+		goto end;
+
+	server_fd = accept(listen_fd, NULL, 0);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+				  &zero, &server_fd, BPF_NOEXIST);
+	if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed"))
+		goto end;
+
+	/* MPTCP client should also be disallowed */
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+				  &zero, &client_fd1, BPF_NOEXIST);
+	if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed"))
+		goto end;
+end:
+	if (client_fd1 >= 0)
+		close(client_fd1);
+	if (server_fd >= 0)
+		close(server_fd);
+	close(listen_fd);
+}
+
+static void test_mptcp_sockmap(void)
+{
+	struct mptcp_sockmap *skel;
+	struct netns_obj *netns;
+	int cgroup_fd, err;
+
+	cgroup_fd = test__join_cgroup("/mptcp_sockmap");
+	if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap"))
+		return;
+
+	skel = mptcp_sockmap__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap"))
+		goto close_cgroup;
+
+	skel->links.mptcp_sockmap_inject =
+		bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap"))
+		goto skel_destroy;
+
+	err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect),
+			      bpf_map__fd(skel->maps.sock_map),
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+		goto skel_destroy;
+
+	netns = netns_new(NS_TEST, true);
+	if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap"))
+		goto skel_destroy;
+
+	if (endpoint_init("subflow") < 0)
+		goto close_netns;
+
+	test_sockmap_with_mptcp_fallback(skel);
+	test_sockmap_reject_mptcp(skel);
+
+close_netns:
+	netns_free(netns);
+skel_destroy:
+	mptcp_sockmap__destroy(skel);
+close_cgroup:
+	close(cgroup_fd);
+}
+
 void test_mptcp(void)
 {
 	if (test__start_subtest("base"))
@@ -444,4 +582,6 @@ void test_mptcp(void)
 		test_mptcpify();
 	if (test__start_subtest("subflow"))
 		test_subflow();
+	if (test__start_subtest("sockmap"))
+		test_mptcp_sockmap();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index bc24f83339d6..0a7ef770c487 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel)
 	int pbe_size = sizeof(struct perf_branch_entry);
 	int duration = 0;
 
+	if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+		  "checked sample validity before prog run"))
+		return;
+
 	if (CHECK(!skel->bss->valid, "output not valid",
 		 "no valid sample from prog"))
 		return;
@@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel)
 	int written_stack = skel->bss->written_stack_out;
 	int duration = 0;
 
+	if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+		  "checked sample validity before prog run"))
+		return;
+
 	if (CHECK(!skel->bss->valid, "output not valid",
 		 "no valid sample from prog"))
 		return;
@@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd,
 	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
 	if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
 		goto out_destroy;
-	/* spin the loop for a while (random high number) */
-	for (i = 0; i < 1000000; ++i)
+
+	/* Spin the loop for a while by using a high iteration count, and by
+	 * checking whether the specific run count marker has been explicitly
+	 * incremented at least once by the backing perf_event BPF program.
+	 */
+	for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
 		++j;
 
 	test_perf_branches__detach(skel);
@@ -116,11 +128,11 @@ static void test_perf_branches_hw(void)
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 
 	/*
-	 * Some setups don't support branch records (virtual machines, !x86),
-	 * so skip test in this case.
+	 * Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen
+	 * 3 which only supports BRS), so skip test in this case.
 	 */
 	if (pfd < 0) {
-		if (errno == ENOENT || errno == EOPNOTSUPP) {
+		if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) {
 			printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
 			       __func__);
 			test__skip();
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index c9f855e5da24..246eb259c08a 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -28,6 +28,7 @@ static void test_success(void)
 	bpf_program__set_autoload(skel->progs.two_regions, true);
 	bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
 	bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+	bpf_program__set_autoload(skel->progs.nested_rcu_region, true);
 	bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
 	bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
 	bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
@@ -78,7 +79,8 @@ static const char * const inproper_region_tests[] = {
 	"non_sleepable_rcu_mismatch",
 	"inproper_sleepable_helper",
 	"inproper_sleepable_kfunc",
-	"nested_rcu_region",
+	"nested_rcu_region_unbalanced_1",
+	"nested_rcu_region_unbalanced_2",
 	"rcu_read_lock_global_subprog_lock",
 	"rcu_read_lock_global_subprog_unlock",
 	"rcu_read_lock_sleepable_helper_global_subprog",
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
index d6bd5e16e637..d2c0542716a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void)
 	ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
 	refcounted_kptr__destroy(skel);
 }
+
+void test_percpu_hash_refcounted_kptr_refcount_leak(void)
+{
+	struct refcounted_kptr *skel;
+	int cpu_nr, fd, err, key = 0;
+	struct bpf_map *map;
+	size_t values_sz;
+	u64 *values;
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+
+	cpu_nr = libbpf_num_possible_cpus();
+	if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
+		return;
+
+	values = calloc(cpu_nr, sizeof(u64));
+	if (!ASSERT_OK_PTR(values, "calloc values"))
+		return;
+
+	skel = refcounted_kptr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
+		free(values);
+		return;
+	}
+
+	values_sz = cpu_nr * sizeof(u64);
+	memset(values, 0, values_sz);
+
+	map = skel->maps.percpu_hash;
+	err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+	if (!ASSERT_OK(err, "bpf_map__update_elem"))
+		goto out;
+
+	fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
+	err = bpf_prog_test_run_opts(fd, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto out;
+	if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
+		goto out;
+
+	err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+	if (!ASSERT_OK(err, "bpf_map__update_elem"))
+		goto out;
+
+	fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
+	err = bpf_prog_test_run_opts(fd, &opts);
+	ASSERT_OK(err, "bpf_prog_test_run_opts");
+	ASSERT_EQ(opts.retval, 1, "opts.retval");
+
+out:
+	refcounted_kptr__destroy(skel);
+	free(values);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
index 8c6c2043a432..f0a8c828f8f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -110,8 +110,8 @@ void serial_test_res_spin_lock_stress(void)
 	ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
 	sleep(5);
 	unload_module("bpf_test_rqspinlock", false);
-
-	ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA");
-	sleep(5);
-	unload_module("bpf_test_rqspinlock", false);
+	/*
+	 * Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test
+	 * other cases (ABBA, ABBCCA).
+	 */
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index d1e4cb28a72c..64520684d2cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -17,6 +17,7 @@
 #include "test_ringbuf_n.lskel.h"
 #include "test_ringbuf_map_key.lskel.h"
 #include "test_ringbuf_write.lskel.h"
+#include "test_ringbuf_overwrite.lskel.h"
 
 #define EDONE 7777
 
@@ -497,6 +498,68 @@ cleanup:
 	test_ringbuf_map_key_lskel__destroy(skel_map_key);
 }
 
+static void ringbuf_overwrite_mode_subtest(void)
+{
+	unsigned long size, len1, len2, len3, len4, len5;
+	unsigned long expect_avail_data, expect_prod_pos, expect_over_pos;
+	struct test_ringbuf_overwrite_lskel *skel;
+	int page_size = getpagesize();
+	int err;
+
+	skel = test_ringbuf_overwrite_lskel__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	size = page_size;
+	len1 = page_size / 2;
+	len2 = page_size / 4;
+	len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3;
+	len4 = len3 - 8;
+	len5 = len3; /* retry with len3 */
+
+	skel->maps.ringbuf.max_entries = size;
+	skel->rodata->LEN1 = len1;
+	skel->rodata->LEN2 = len2;
+	skel->rodata->LEN3 = len3;
+	skel->rodata->LEN4 = len4;
+	skel->rodata->LEN5 = len5;
+
+	skel->bss->pid = getpid();
+
+	err = test_ringbuf_overwrite_lskel__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = test_ringbuf_overwrite_lskel__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	syscall(__NR_getpgid);
+
+	ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1");
+	ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2");
+	ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3");
+	ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4");
+	ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5");
+
+	ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size");
+
+	expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ;
+	ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size");
+
+	ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos");
+
+	expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ;
+	ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos");
+
+	expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ;
+	ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos");
+
+	test_ringbuf_overwrite_lskel__detach(skel);
+cleanup:
+	test_ringbuf_overwrite_lskel__destroy(skel);
+}
+
 void test_ringbuf(void)
 {
 	if (test__start_subtest("ringbuf"))
@@ -507,4 +570,6 @@ void test_ringbuf(void)
 		ringbuf_map_key_subtest();
 	if (test__start_subtest("ringbuf_write"))
 		ringbuf_write_subtest();
+	if (test__start_subtest("ringbuf_overwrite_mode"))
+		ringbuf_overwrite_mode_subtest();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 036d4760d2c1..3dbcc091f16c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -41,11 +41,7 @@ static struct bpf_object *obj;
 static __u32 index_zero;
 static int epfd;
 
-static union sa46 {
-	struct sockaddr_in6 v6;
-	struct sockaddr_in v4;
-	sa_family_t family;
-} srv_sa;
+static struct sockaddr_storage srv_sa;
 
 #define RET_IF(condition, tag, format...) ({				\
 	if (CHECK_FAIL(condition)) {					\
@@ -135,24 +131,24 @@ static int prepare_bpf_obj(void)
 	return 0;
 }
 
-static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family)
 {
 	memset(sa, 0, sizeof(*sa));
-	sa->family = family;
-	if (sa->family == AF_INET6)
-		sa->v6.sin6_addr = in6addr_loopback;
+	sa->ss_family = family;
+	if (sa->ss_family == AF_INET6)
+		((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback;
 	else
-		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 }
 
-static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family)
 {
 	memset(sa, 0, sizeof(*sa));
-	sa->family = family;
-	if (sa->family == AF_INET6)
-		sa->v6.sin6_addr = in6addr_any;
+	sa->ss_family = family;
+	if (sa->ss_family == AF_INET6)
+		((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any;
 	else
-		sa->v4.sin_addr.s_addr = INADDR_ANY;
+		((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY;
 }
 
 static int read_int_sysctl(const char *sysctl)
@@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
 		       int cli_fd)
 {
 	struct data_check expected = {}, result;
-	union sa46 cli_sa;
+	struct sockaddr_storage cli_sa;
 	socklen_t addrlen;
 	int err;
 
@@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
 	}
 
 	if (family == AF_INET6) {
+		struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa;
+		struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa;
+
 		expected.eth_protocol = htons(ETH_P_IPV6);
-		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
-			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
-			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
-			!srv_sa.v6.sin6_addr.s6_addr32[0];
+		expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] &&
+			!srv_v6->sin6_addr.s6_addr32[2] &&
+			!srv_v6->sin6_addr.s6_addr32[1] &&
+			!srv_v6->sin6_addr.s6_addr32[0];
 
-		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
-		       sizeof(cli_sa.v6.sin6_addr));
+		memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32,
+		       sizeof(cli_v6->sin6_addr));
 		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
 		       sizeof(in6addr_loopback));
-		expected.skb_ports[0] = cli_sa.v6.sin6_port;
-		expected.skb_ports[1] = srv_sa.v6.sin6_port;
+		expected.skb_ports[0] = cli_v6->sin6_port;
+		expected.skb_ports[1] = srv_v6->sin6_port;
 	} else {
+		struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa;
+		struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa;
+
 		expected.eth_protocol = htons(ETH_P_IP);
-		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+		expected.bind_inany = !srv_v4->sin_addr.s_addr;
 
-		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+		expected.skb_addrs[0] = cli_v4->sin_addr.s_addr;
 		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
-		expected.skb_ports[0] = cli_sa.v4.sin_port;
-		expected.skb_ports[1] = srv_sa.v4.sin_port;
+		expected.skb_ports[0] = cli_v4->sin_port;
+		expected.skb_ports[1] = srv_v4->sin_port;
 	}
 
 	if (memcmp(&result, &expected, offsetof(struct data_check,
@@ -364,16 +366,15 @@ static void check_results(void)
 static int send_data(int type, sa_family_t family, void *data, size_t len,
 		     enum result expected)
 {
-	union sa46 cli_sa;
+	struct sockaddr_storage cli_sa;
 	int fd, err;
 
 	fd = socket(family, type, 0);
 	RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
 
-	sa46_init_loopback(&cli_sa, family);
+	ss_init_loopback(&cli_sa, family);
 	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
 	RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
-
 	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
 		     sizeof(srv_sa));
 	RET_ERR(err != len && expected >= PASS,
@@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
 	socklen_t addrlen;
 
 	if (inany)
-		sa46_init_inany(&srv_sa, family);
+		ss_init_inany(&srv_sa, family);
 	else
-		sa46_init_loopback(&srv_sa, family);
+		ss_init_loopback(&srv_sa, family);
 	addrlen = sizeof(srv_sa);
 
 	/*
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 1702aa592c2c..7ac4d5a488aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -206,6 +206,11 @@ destroy_skel:
 skel_open_load_failure:
 	close(pipe_c2p[0]);
 	close(pipe_p2c[1]);
+	/*
+	 * Child is either about to exit cleanly or stuck in case of errors.
+	 * Nudge it to exit.
+	 */
+	kill(pid, SIGKILL);
 	wait(NULL);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..e4940583924b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "sk_bypass_prot_mem.skel.h"
+#include "network_helpers.h"
+
+#define NR_PAGES	32
+#define NR_SOCKETS	2
+#define BUF_TOTAL	(NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_SINGLE	1024
+#define NR_SEND		(BUF_TOTAL / BUF_SINGLE)
+
+struct test_case {
+	char name[8];
+	int family;
+	int type;
+	int (*create_sockets)(struct test_case *test_case, int sk[], int len);
+	long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel);
+};
+
+static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+	int server, i, err = 0;
+
+	server = start_server(test_case->family, test_case->type, NULL, 0, 0);
+	if (!ASSERT_GE(server, 0, "start_server_str"))
+		return server;
+
+	/* Keep for-loop so we can change NR_SOCKETS easily. */
+	for (i = 0; i < len; i += 2) {
+		sk[i] = connect_to_fd(server, 0);
+		if (sk[i] < 0) {
+			ASSERT_GE(sk[i], 0, "connect_to_fd");
+			err = sk[i];
+			break;
+		}
+
+		sk[i + 1] = accept(server, NULL, NULL);
+		if (sk[i + 1] < 0) {
+			ASSERT_GE(sk[i + 1], 0, "accept");
+			err = sk[i + 1];
+			break;
+		}
+	}
+
+	close(server);
+
+	return err;
+}
+
+static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+	int i, j, err, rcvbuf = BUF_TOTAL;
+
+	/* Keep for-loop so we can change NR_SOCKETS easily. */
+	for (i = 0; i < len; i += 2) {
+		sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
+		if (sk[i] < 0) {
+			ASSERT_GE(sk[i], 0, "start_server");
+			return sk[i];
+		}
+
+		sk[i + 1] = connect_to_fd(sk[i], 0);
+		if (sk[i + 1] < 0) {
+			ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
+			return sk[i + 1];
+		}
+
+		err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
+		if (err) {
+			ASSERT_EQ(err, 0, "connect_fd_to_fd");
+			return err;
+		}
+
+		for (j = 0; j < 2; j++) {
+			err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
+			if (err) {
+				ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
+				return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static long get_memory_allocated(struct test_case *test_case,
+				 bool *activated, long *memory_allocated)
+{
+	int sk;
+
+	*activated = true;
+
+	/* AF_INET and AF_INET6 share the same memory_allocated.
+	 * tcp_init_sock() is called by AF_INET and AF_INET6,
+	 * but udp_lib_init_sock() is inline.
+	 */
+	sk = socket(AF_INET, test_case->type, 0);
+	if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
+		return -1;
+
+	close(sk);
+
+	return *memory_allocated;
+}
+
+static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+	return get_memory_allocated(test_case,
+				    &skel->bss->tcp_activated,
+				    &skel->bss->tcp_memory_allocated);
+}
+
+static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+	return get_memory_allocated(test_case,
+				    &skel->bss->udp_activated,
+				    &skel->bss->udp_memory_allocated);
+}
+
+static int check_bypass(struct test_case *test_case,
+			struct sk_bypass_prot_mem *skel, bool bypass)
+{
+	char buf[BUF_SINGLE] = {};
+	long memory_allocated[2];
+	int sk[NR_SOCKETS];
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(sk); i++)
+		sk[i] = -1;
+
+	err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
+	if (err)
+		goto close;
+
+	memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
+
+	/* allocate pages >= NR_PAGES */
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		for (j = 0; j < NR_SEND; j++) {
+			int bytes = send(sk[i], buf, sizeof(buf), 0);
+
+			/* Avoid too noisy logs when something failed. */
+			if (bytes != sizeof(buf)) {
+				ASSERT_EQ(bytes, sizeof(buf), "send");
+				if (bytes < 0) {
+					err = bytes;
+					goto drain;
+				}
+			}
+		}
+	}
+
+	memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
+
+	if (bypass)
+		ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass");
+	else
+		ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass");
+
+drain:
+	if (test_case->type == SOCK_DGRAM) {
+		/* UDP starts purging sk->sk_receive_queue after one RCU
+		 * grace period, then udp_memory_allocated goes down,
+		 * so drain the queue before close().
+		 */
+		for (i = 0; i < ARRAY_SIZE(sk); i++) {
+			for (j = 0; j < NR_SEND; j++) {
+				int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
+
+				if (bytes == sizeof(buf))
+					continue;
+				if (bytes != -1 || errno != EAGAIN)
+					PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
+				break;
+			}
+		}
+	}
+
+close:
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		if (sk[i] < 0)
+			break;
+
+		close(sk[i]);
+	}
+
+	return err;
+}
+
+static void run_test(struct test_case *test_case)
+{
+	struct sk_bypass_prot_mem *skel;
+	struct nstoken *nstoken;
+	int cgroup, err;
+
+	skel = sk_bypass_prot_mem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+	err = sk_bypass_prot_mem__attach(skel);
+	if (!ASSERT_OK(err, "attach"))
+		goto destroy_skel;
+
+	cgroup = test__join_cgroup("/sk_bypass_prot_mem");
+	if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
+		goto destroy_skel;
+
+	err = make_netns("sk_bypass_prot_mem");
+	if (!ASSERT_EQ(err, 0, "make_netns"))
+		goto close_cgroup;
+
+	nstoken = open_netns("sk_bypass_prot_mem");
+	if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+		goto remove_netns;
+
+	err = check_bypass(test_case, skel, false);
+	if (!ASSERT_EQ(err, 0, "test_bypass(false)"))
+		goto close_netns;
+
+	err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1");
+	if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
+		goto close_netns;
+
+	err = check_bypass(test_case, skel, true);
+	if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)"))
+		goto close_netns;
+
+	err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0");
+	if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
+		goto close_netns;
+
+	skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
+	if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
+		goto close_netns;
+
+	err = check_bypass(test_case, skel, true);
+	ASSERT_EQ(err, 0, "test_bypass(true by bpf)");
+
+close_netns:
+	close_netns(nstoken);
+remove_netns:
+	remove_netns("sk_bypass_prot_mem");
+close_cgroup:
+	close(cgroup);
+destroy_skel:
+	sk_bypass_prot_mem__destroy(skel);
+}
+
+static struct test_case test_cases[] = {
+	{
+		.name = "TCP  ",
+		.family = AF_INET,
+		.type = SOCK_STREAM,
+		.create_sockets = tcp_create_sockets,
+		.get_memory_allocated = tcp_get_memory_allocated,
+	},
+	{
+		.name = "UDP  ",
+		.family = AF_INET,
+		.type = SOCK_DGRAM,
+		.create_sockets = udp_create_sockets,
+		.get_memory_allocated = udp_get_memory_allocated,
+	},
+	{
+		.name = "TCPv6",
+		.family = AF_INET6,
+		.type = SOCK_STREAM,
+		.create_sockets = tcp_create_sockets,
+		.get_memory_allocated = tcp_get_memory_allocated,
+	},
+	{
+		.name = "UDPv6",
+		.family = AF_INET6,
+		.type = SOCK_DGRAM,
+		.create_sockets = udp_create_sockets,
+		.get_memory_allocated = udp_get_memory_allocated,
+	},
+};
+
+void serial_test_sk_bypass_prot_mem(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		if (test__start_subtest(test_cases[i].name))
+			run_test(&test_cases[i]);
+	}
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
new file mode 100644
index 000000000000..c9efdd2a5b18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_ips.skel.h"
+
+#ifdef __x86_64__
+static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...)
+{
+	__u64 ips[PERF_MAX_STACK_DEPTH];
+	struct ksyms *ksyms = NULL;
+	int i, err = 0;
+	va_list args;
+
+	/* sorted by addr */
+	ksyms = load_kallsyms_local();
+	if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+		return -1;
+
+	/* unlikely, but... */
+	if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max"))
+		return -1;
+
+	err = bpf_map_lookup_elem(fd, &key, ips);
+	if (err)
+		goto out;
+
+	/*
+	 * Compare all symbols provided via arguments with stacktrace ips,
+	 * and their related symbol addresses.t
+	 */
+	va_start(args, cnt);
+
+	for (i = 0; i < cnt; i++) {
+		unsigned long val;
+		struct ksym *ksym;
+
+		val = va_arg(args, unsigned long);
+		ksym = ksym_search_local(ksyms, ips[i]);
+		if (!ASSERT_OK_PTR(ksym, "ksym_search_local"))
+			break;
+		ASSERT_EQ(ksym->addr, val, "stack_cmp");
+	}
+
+	va_end(args);
+
+out:
+	free_kallsyms_local(ksyms);
+	return err;
+}
+
+static void test_stacktrace_ips_kprobe_multi(bool retprobe)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts,
+		.retprobe = retprobe
+	);
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct stacktrace_ips *skel;
+
+	skel = stacktrace_ips__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+		return;
+
+	if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+		test__skip();
+		goto cleanup;
+	}
+
+	skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts(
+							skel->progs.kprobe_multi_test,
+							"bpf_testmod_stacktrace_test", &opts);
+	if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts"))
+		goto cleanup;
+
+	trigger_module_test_read(1);
+
+	load_kallsyms();
+
+	check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+			     ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+			     ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+			     ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+			     ksym_get_addr("bpf_testmod_test_read"));
+
+cleanup:
+	stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_raw_tp(void)
+{
+	__u32 info_len = sizeof(struct bpf_prog_info);
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct bpf_prog_info info = {};
+	struct stacktrace_ips *skel;
+	__u64 bpf_prog_ksym = 0;
+	int err;
+
+	skel = stacktrace_ips__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+		return;
+
+	if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+		test__skip();
+		goto cleanup;
+	}
+
+	skel->links.rawtp_test = bpf_program__attach_raw_tracepoint(
+							skel->progs.rawtp_test,
+							"bpf_testmod_test_read");
+	if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint"))
+		goto cleanup;
+
+	/* get bpf program address */
+	info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym);
+	info.nr_jited_ksyms = 1;
+	err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test),
+				      &info, &info_len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+		goto cleanup;
+
+	trigger_module_test_read(1);
+
+	load_kallsyms();
+
+	check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2,
+			     bpf_prog_ksym,
+			     ksym_get_addr("bpf_trace_run2"));
+
+cleanup:
+	stacktrace_ips__destroy(skel);
+}
+
+static void __test_stacktrace_ips(void)
+{
+	if (test__start_subtest("kprobe_multi"))
+		test_stacktrace_ips_kprobe_multi(false);
+	if (test__start_subtest("kretprobe_multi"))
+		test_stacktrace_ips_kprobe_multi(true);
+	if (test__start_subtest("raw_tp"))
+		test_stacktrace_ips_raw_tp();
+}
+#else
+static void __test_stacktrace_ips(void)
+{
+	test__skip();
+}
+#endif
+
+void test_stacktrace_ips(void)
+{
+	__test_stacktrace_ips();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
index 4d66fad3c8bd..0f3bf594e7a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -20,7 +20,9 @@ static const char * const test_cases[] = {
 	"strcspn_str",
 	"strcspn_reject",
 	"strstr",
+	"strcasestr",
 	"strnstr",
+	"strncasestr",
 };
 
 void run_too_long_tests(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
new file mode 100644
index 000000000000..de22734abc4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/genetlink.h>
+#include "network_helpers.h"
+#include "bpf_smc.skel.h"
+
+#ifndef IPPROTO_SMC
+#define IPPROTO_SMC 256
+#endif
+
+#define CLIENT_IP			"127.0.0.1"
+#define SERVER_IP			"127.0.1.0"
+#define SERVER_IP_VIA_RISK_PATH	"127.0.2.0"
+
+#define SERVICE_1	80
+#define SERVICE_2	443
+#define SERVICE_3	8443
+
+#define TEST_NS	"bpf_smc_netns"
+
+static struct netns_obj *test_netns;
+
+struct smc_policy_ip_key {
+	__u32  sip;
+	__u32  dip;
+};
+
+struct smc_policy_ip_value {
+	__u8	mode;
+};
+
+#if defined(__s390x__)
+/* s390x has default seid  */
+static bool setup_ueid(void) { return true; }
+static void cleanup_ueid(void) {}
+#else
+enum {
+	SMC_NETLINK_ADD_UEID = 10,
+	SMC_NETLINK_REMOVE_UEID
+};
+
+enum {
+	SMC_NLA_EID_TABLE_UNSPEC,
+	SMC_NLA_EID_TABLE_ENTRY,    /* string */
+};
+
+struct msgtemplate {
+	struct nlmsghdr n;
+	struct genlmsghdr g;
+	char buf[1024];
+};
+
+#define GENLMSG_DATA(glh)	((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh)	(NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na)		((void *)((char *)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len)	((len) - NLA_HDRLEN)
+
+#define SMC_GENL_FAMILY_NAME	"SMC_GEN_NETLINK"
+#define SMC_BPFTEST_UEID	"SMC-BPFTEST-UEID"
+
+static uint16_t smc_nl_family_id = -1;
+
+static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid,
+		    __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type,
+		    void *nla_data, int nla_len)
+{
+	struct nlattr *na;
+	struct sockaddr_nl nladdr;
+	int r, buflen;
+	char *buf;
+
+	struct msgtemplate msg = {0};
+
+	msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+	msg.n.nlmsg_type = nlmsg_type;
+	msg.n.nlmsg_flags = nlmsg_flags;
+	msg.n.nlmsg_seq = 0;
+	msg.n.nlmsg_pid = nlmsg_pid;
+	msg.g.cmd = genl_cmd;
+	msg.g.version = 1;
+	na = (struct nlattr *)GENLMSG_DATA(&msg);
+	na->nla_type = nla_type;
+	na->nla_len = nla_len + 1 + NLA_HDRLEN;
+	memcpy(NLA_DATA(na), nla_data, nla_len);
+	msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+	buf = (char *)&msg;
+	buflen = msg.n.nlmsg_len;
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr,
+			   sizeof(nladdr))) < buflen) {
+		if (r > 0) {
+			buf += r;
+			buflen -= r;
+		} else if (errno != EAGAIN) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static bool get_smc_nl_family_id(void)
+{
+	struct sockaddr_nl nl_src;
+	struct msgtemplate msg;
+	struct nlattr *nl;
+	int fd, ret;
+	pid_t pid;
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+	if (!ASSERT_OK_FD(fd, "nl_family socket"))
+		return false;
+
+	pid = getpid();
+
+	memset(&nl_src, 0, sizeof(nl_src));
+	nl_src.nl_family = AF_NETLINK;
+	nl_src.nl_pid = pid;
+
+	ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+	if (!ASSERT_OK(ret, "nl_family bind"))
+		goto fail;
+
+	ret = send_cmd(fd, GENL_ID_CTRL, pid,
+		       NLM_F_REQUEST, CTRL_CMD_GETFAMILY,
+		       CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME,
+		       strlen(SMC_GENL_FAMILY_NAME));
+	if (!ASSERT_OK(ret, "nl_family query"))
+		goto fail;
+
+	ret = recv(fd, &msg, sizeof(msg), 0);
+	if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 ||
+			  !NLMSG_OK(&msg.n, ret), "nl_family response"))
+		goto fail;
+
+	nl = (struct nlattr *)GENLMSG_DATA(&msg);
+	nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len));
+	if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type"))
+		goto fail;
+
+	smc_nl_family_id = *(uint16_t *)NLA_DATA(nl);
+	close(fd);
+	return true;
+fail:
+	close(fd);
+	return false;
+}
+
+static bool smc_ueid(int op)
+{
+	struct sockaddr_nl nl_src;
+	struct msgtemplate msg;
+	struct nlmsgerr *err;
+	char test_ueid[32];
+	int fd, ret;
+	pid_t pid;
+
+	/* UEID required */
+	memset(test_ueid, '\x20', sizeof(test_ueid));
+	memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID));
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+	if (!ASSERT_OK_FD(fd, "ueid socket"))
+		return false;
+
+	pid = getpid();
+	memset(&nl_src, 0, sizeof(nl_src));
+	nl_src.nl_family = AF_NETLINK;
+	nl_src.nl_pid = pid;
+
+	ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+	if (!ASSERT_OK(ret, "ueid bind"))
+		goto fail;
+
+	ret = send_cmd(fd, smc_nl_family_id, pid,
+		       NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY,
+		       (void *)test_ueid, sizeof(test_ueid));
+	if (!ASSERT_OK(ret, "ueid cmd"))
+		goto fail;
+
+	ret = recv(fd, &msg, sizeof(msg), 0);
+	if (!ASSERT_FALSE(ret < 0 ||
+			  !NLMSG_OK(&msg.n, ret), "ueid response"))
+		goto fail;
+
+	if (msg.n.nlmsg_type == NLMSG_ERROR) {
+		err = NLMSG_DATA(&msg);
+		switch (op) {
+		case SMC_NETLINK_REMOVE_UEID:
+			if (!ASSERT_FALSE((err->error && err->error != -ENOENT),
+					  "ueid remove"))
+				goto fail;
+			break;
+		case SMC_NETLINK_ADD_UEID:
+			if (!ASSERT_OK(err->error, "ueid add"))
+				goto fail;
+			break;
+		default:
+			break;
+		}
+	}
+	close(fd);
+	return true;
+fail:
+	close(fd);
+	return false;
+}
+
+static bool setup_ueid(void)
+{
+	/* get smc nl id */
+	if (!get_smc_nl_family_id())
+		return false;
+	/* clear old ueid for bpftest */
+	smc_ueid(SMC_NETLINK_REMOVE_UEID);
+	/* smc-loopback required ueid */
+	return smc_ueid(SMC_NETLINK_ADD_UEID);
+}
+
+static void cleanup_ueid(void)
+{
+	smc_ueid(SMC_NETLINK_REMOVE_UEID);
+}
+#endif /* __s390x__ */
+
+static bool setup_netns(void)
+{
+	test_netns = netns_new(TEST_NS, true);
+	if (!ASSERT_OK_PTR(test_netns, "open net namespace"))
+		goto fail_netns;
+
+	SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo");
+	SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo");
+
+	return true;
+fail_ip:
+	netns_free(test_netns);
+fail_netns:
+	return false;
+}
+
+static void cleanup_netns(void)
+{
+	netns_free(test_netns);
+}
+
+static bool setup_smc(void)
+{
+	if (!setup_ueid())
+		return false;
+
+	if (!setup_netns())
+		goto fail_netns;
+
+	return true;
+fail_netns:
+	cleanup_ueid();
+	return false;
+}
+
+static int set_client_addr_cb(int fd, void *opts)
+{
+	const char *src = (const char *)opts;
+	struct sockaddr_in localaddr;
+
+	localaddr.sin_family = AF_INET;
+	localaddr.sin_port = htons(0);
+	localaddr.sin_addr.s_addr = inet_addr(src);
+	return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind");
+}
+
+static void run_link(const char *src, const char *dst, int port)
+{
+	struct network_helper_opts opts = {0};
+	int server, client;
+
+	server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL);
+	if (!ASSERT_OK_FD(server, "start service_1"))
+		return;
+
+	opts.proto = IPPROTO_TCP;
+	opts.post_socket_cb = set_client_addr_cb;
+	opts.cb_opts = (void *)src;
+
+	client = connect_to_fd_opts(server, &opts);
+	if (!ASSERT_OK_FD(client, "start connect"))
+		goto fail_client;
+
+	close(client);
+fail_client:
+	close(server);
+}
+
+static void block_link(int map_fd, const char *src, const char *dst)
+{
+	struct smc_policy_ip_value val = { .mode = /* block */ 0 };
+	struct smc_policy_ip_key key = {
+		.sip = inet_addr(src),
+		.dip = inet_addr(dst),
+	};
+
+	bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+}
+
+/*
+ * This test describes a real-life service topology as follows:
+ *
+ *                             +-------------> service_1
+ *            link 1           |                     |
+ *   +--------------------> server                   |  link 2
+ *   |                         |                     V
+ *   |                         +-------------> service_2
+ *   |        link 3
+ *  client -------------------> server_via_unsafe_path -> service_3
+ *
+ * Among them,
+ * 1. link-1 is very suitable for using SMC.
+ * 2. link-2 is not suitable for using SMC, because the mode of this link is
+ *    kind of short-link services.
+ * 3. link-3 is also not suitable for using SMC, because the RDMA link is
+ *    unavailable and needs to go through a long timeout before it can fallback
+ *    to TCP.
+ * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl.
+ */
+static void test_topo(void)
+{
+	struct bpf_smc *skel;
+	int rc, map_fd;
+
+	skel = bpf_smc__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load"))
+		return;
+
+	rc = bpf_smc__attach(skel);
+	if (!ASSERT_OK(rc, "bpf_smc__attach"))
+		goto fail;
+
+	map_fd = bpf_map__fd(skel->maps.smc_policy_ip);
+	if (!ASSERT_OK_FD(map_fd, "bpf_map__fd"))
+		goto fail;
+
+	/* Mock the process of transparent replacement, since we will modify
+	 * protocol to ipproto_smc accropding to it via
+	 * fmod_ret/update_socket_protocol.
+	 */
+	write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck");
+
+	/* Configure ip strat */
+	block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH);
+	block_link(map_fd, SERVER_IP, SERVER_IP);
+
+	/* should go with smc */
+	run_link(CLIENT_IP, SERVER_IP, SERVICE_1);
+	/* should go with smc fallback */
+	run_link(SERVER_IP, SERVER_IP, SERVICE_2);
+
+	ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count");
+	ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+	/* should go with smc */
+	run_link(CLIENT_IP, SERVER_IP, SERVICE_2);
+
+	ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count");
+	ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+	/* should go with smc fallback */
+	run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3);
+
+	ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count");
+	ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count");
+
+fail:
+	bpf_smc__destroy(skel);
+}
+
+void test_bpf_smc(void)
+{
+	if (!setup_smc()) {
+		printf("setup for smc test failed, test SKIP:\n");
+		test__skip();
+		return;
+	}
+
+	if (test__start_subtest("topo"))
+		test_topo();
+
+	cleanup_ueid();
+	cleanup_netns();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 2a27f3714f5c..bdc4fc06bc5a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -139,7 +139,7 @@ static void test_lsm_tailcall(void)
 	if (CHECK_FAIL(!err))
 		goto close_prog;
 
-	prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog);
+	prog_fd = bpf_program__fd(skel->progs.lsm_kernfs_init_security_prog);
 	if (CHECK_FAIL(prog_fd < 0))
 		goto close_prog;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
new file mode 100644
index 000000000000..462512fb191f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * BPF-based flow shaping
+ *
+ * The test brings up two veth in two isolated namespaces, attach some flow
+ * shaping program onto it, and ensures that a manual speedtest maximum
+ * value matches the rate set in the BPF shapers.
+ */
+
+#include <asm-generic/socket.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+#include <pthread.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_edt.skel.h"
+
+#define SERVER_NS "tc-edt-server-ns"
+#define CLIENT_NS "tc-edt-client-ns"
+#define IP4_ADDR_VETH1 "192.168.1.1"
+#define IP4_ADDR_VETH2 "192.168.1.2"
+#define IP4_ADDR_VETH2_HEX 0xC0A80102
+
+#define TIMEOUT_MS		2000
+#define TEST_PORT		9000
+#define TARGET_RATE_MBPS	5.0
+#define TX_BYTES_COUNT		(1 * 1000 * 1000)
+#define RATE_ERROR_PERCENT	2.0
+
+struct connection {
+	int server_listen_fd;
+	int server_conn_fd;
+	int client_conn_fd;
+};
+
+static int setup(struct test_tc_edt *skel)
+{
+	struct nstoken *nstoken_client, *nstoken_server;
+	int ret;
+
+	if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns"))
+		goto fail;
+	if (!ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+		goto fail_delete_client_ns;
+
+	nstoken_client = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+		goto fail_delete_server_ns;
+	SYS(fail_close_client_ns, "ip link add veth1 type veth peer name %s",
+	    "veth2 netns " SERVER_NS);
+	SYS(fail_close_client_ns, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+	SYS(fail_close_client_ns, "ip link set veth1 up");
+
+	nstoken_server = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken_server, "enter server ns"))
+		goto fail_close_client_ns;
+	SYS(fail_close_server_ns, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+	SYS(fail_close_server_ns, "ip link set veth2 up");
+	SYS(fail_close_server_ns, "tc qdisc add dev veth2 root fq");
+	ret = tc_prog_attach("veth2", -1, bpf_program__fd(skel->progs.tc_prog));
+	if (!ASSERT_OK(ret, "attach bpf prog"))
+		goto fail_close_server_ns;
+	skel->bss->target_rate = TARGET_RATE_MBPS * 1000 * 1000;
+	close_netns(nstoken_server);
+	close_netns(nstoken_client);
+
+	return 0;
+
+fail_close_server_ns:
+	close_netns(nstoken_server);
+fail_close_client_ns:
+	close_netns(nstoken_client);
+fail_delete_server_ns:
+	remove_netns(SERVER_NS);
+fail_delete_client_ns:
+	remove_netns(CLIENT_NS);
+fail:
+	return -1;
+}
+
+static void cleanup(void)
+{
+	remove_netns(CLIENT_NS);
+	remove_netns(SERVER_NS);
+}
+
+static void run_test(void)
+{
+	int server_fd, client_fd, err;
+	double rate_mbps, rate_error;
+	struct nstoken *nstoken;
+	__u64 ts_start, ts_end;
+
+	nstoken = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return;
+	server_fd = start_server(AF_INET, SOCK_STREAM, IP4_ADDR_VETH2,
+			TEST_PORT, TIMEOUT_MS);
+	if (!ASSERT_OK_FD(server_fd, "start server"))
+		return;
+
+	close_netns(nstoken);
+	nstoken = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+		return;
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_OK_FD(client_fd, "connect client"))
+		return;
+
+	ts_start = get_time_ns();
+	err = send_recv_data(server_fd, client_fd, TX_BYTES_COUNT);
+	ts_end = get_time_ns();
+	close_netns(nstoken);
+	ASSERT_OK(err, "send_recv_data");
+
+	rate_mbps = TX_BYTES_COUNT / ((ts_end - ts_start) / 1000.0);
+	rate_error =
+		fabs((rate_mbps - TARGET_RATE_MBPS) * 100.0 / TARGET_RATE_MBPS);
+
+	ASSERT_LE(rate_error, RATE_ERROR_PERCENT,
+		  "rate error is lower than threshold");
+}
+
+void test_tc_edt(void)
+{
+	struct test_tc_edt *skel;
+
+	skel = test_tc_edt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel open and load"))
+		return;
+
+	if (!ASSERT_OK(setup(skel), "global setup"))
+		return;
+
+	run_test();
+
+	cleanup();
+	test_tc_edt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
new file mode 100644
index 000000000000..0fe0a8f62486
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -0,0 +1,714 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ *   The file tests BPF network tunnels implementation. For each tunnel
+ *   type, the test validates that:
+ *   - basic communication can first be established between the two veths
+ *   - when adding a BPF-based encapsulation on client egress, it now fails
+ *   to communicate with the server
+ *   - when adding a kernel-based decapsulation on server ingress, client
+ *   can now connect
+ *   - when replacing the kernel-based decapsulation with a BPF-based one,
+ *   the client can still connect
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_tunnel.skel.h"
+
+#define SERVER_NS	"tc-tunnel-server-ns"
+#define CLIENT_NS	"tc-tunnel-client-ns"
+#define MAC_ADDR_VETH1	"00:11:22:33:44:55"
+#define IP4_ADDR_VETH1	"192.168.1.1"
+#define IP6_ADDR_VETH1	"fd::1"
+#define MAC_ADDR_VETH2	"66:77:88:99:AA:BB"
+#define IP4_ADDR_VETH2	"192.168.1.2"
+#define IP6_ADDR_VETH2	"fd::2"
+
+#define TEST_NAME_MAX_LEN	64
+#define PROG_NAME_MAX_LEN	64
+#define TUNNEL_ARGS_MAX_LEN	128
+#define BUFFER_LEN		2000
+#define DEFAULT_TEST_DATA_SIZE	100
+#define GSO_TEST_DATA_SIZE	BUFFER_LEN
+
+#define TIMEOUT_MS			1000
+#define TEST_PORT			8000
+#define UDP_PORT			5555
+#define MPLS_UDP_PORT			6635
+#define FOU_MPLS_PROTO			137
+#define VXLAN_ID			1
+#define VXLAN_PORT			8472
+#define MPLS_TABLE_ENTRIES_COUNT	65536
+
+static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN];
+
+struct subtest_cfg {
+	char *ebpf_tun_type;
+	char *iproute_tun_type;
+	char *mac_tun_type;
+	int ipproto;
+	void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst);
+	bool tunnel_need_veth_mac;
+	bool configure_fou_rx_port;
+	char *tmode;
+	bool expect_kern_decap_failure;
+	bool configure_mpls;
+	bool test_gso;
+	char *tunnel_client_addr;
+	char *tunnel_server_addr;
+	char name[TEST_NAME_MAX_LEN];
+	char *server_addr;
+	int client_egress_prog_fd;
+	int server_ingress_prog_fd;
+	char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN];
+	int server_fd;
+};
+
+struct connection {
+	int client_fd;
+	int server_fd;
+};
+
+static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size)
+{
+	int ret;
+
+	ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type,
+		       cfg->mac_tun_type);
+
+	return ret < 0 ? ret : 0;
+}
+
+static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel)
+{
+	char prog_name[PROG_NAME_MAX_LEN];
+	struct bpf_program *prog;
+	int ret;
+
+	ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_");
+	if (ret < 0)
+		return ret;
+	ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret);
+	if (ret < 0)
+		return ret;
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!prog)
+		return -1;
+
+	cfg->client_egress_prog_fd = bpf_program__fd(prog);
+	cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f);
+	return 0;
+}
+
+static void set_subtest_addresses(struct subtest_cfg *cfg)
+{
+	if (cfg->ipproto == 6)
+		cfg->server_addr = IP6_ADDR_VETH2;
+	else
+		cfg->server_addr = IP4_ADDR_VETH2;
+
+	/* Some specific tunnel types need specific addressing, it then
+	 * has been already set in the configuration table. Otherwise,
+	 * deduce the relevant addressing from the ipproto
+	 */
+	if (cfg->tunnel_client_addr && cfg->tunnel_server_addr)
+		return;
+
+	if (cfg->ipproto == 6) {
+		cfg->tunnel_client_addr = IP6_ADDR_VETH1;
+		cfg->tunnel_server_addr = IP6_ADDR_VETH2;
+	} else {
+		cfg->tunnel_client_addr = IP4_ADDR_VETH1;
+		cfg->tunnel_server_addr = IP4_ADDR_VETH2;
+	}
+}
+
+static int run_server(struct subtest_cfg *cfg)
+{
+	int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+	struct nstoken *nstoken;
+	struct network_helper_opts opts = {
+		.timeout_ms = TIMEOUT_MS
+	};
+
+	nstoken = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return -1;
+
+	cfg->server_fd = start_server_str(family, SOCK_STREAM, cfg->server_addr,
+					  TEST_PORT, &opts);
+	close_netns(nstoken);
+	if (!ASSERT_OK_FD(cfg->server_fd, "start server"))
+		return -1;
+
+	return 0;
+}
+
+static int check_server_rx_data(struct subtest_cfg *cfg,
+				struct connection *conn, int len)
+{
+	int err;
+
+	memset(rx_buffer, 0, BUFFER_LEN);
+	err = recv(conn->server_fd, rx_buffer, len, 0);
+	if (!ASSERT_EQ(err, len, "check rx data len"))
+		return 1;
+	if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data"))
+		return 1;
+	return 0;
+}
+
+static struct connection *connect_client_to_server(struct subtest_cfg *cfg)
+{
+	struct network_helper_opts opts = {.timeout_ms = 500};
+	int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+	struct connection *conn = NULL;
+	int client_fd, server_fd;
+
+	conn = malloc(sizeof(struct connection));
+	if (!conn)
+		return conn;
+
+	client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr,
+					TEST_PORT, &opts);
+
+	if (client_fd < 0) {
+		free(conn);
+		return NULL;
+	}
+
+	server_fd = accept(cfg->server_fd, NULL, NULL);
+	if (server_fd < 0) {
+		close(client_fd);
+		free(conn);
+		return NULL;
+	}
+
+	conn->server_fd = server_fd;
+	conn->client_fd = client_fd;
+
+	return conn;
+}
+
+static void disconnect_client_from_server(struct subtest_cfg *cfg,
+					  struct connection *conn)
+{
+	close(conn->server_fd);
+	close(conn->client_fd);
+	free(conn);
+}
+
+static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed)
+{
+	struct connection *conn;
+	int err, res = -1;
+
+	conn = connect_client_to_server(cfg);
+	if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail"))
+		goto end;
+	else if (!must_succeed)
+		return 0;
+
+	if (!ASSERT_OK_PTR(conn, "connection that must succeed"))
+		return -1;
+
+	err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0);
+	if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client"))
+		goto end;
+	if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+		goto end;
+
+	if (!cfg->test_gso) {
+		res = 0;
+		goto end;
+	}
+
+	err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0);
+	if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client"))
+		goto end;
+	if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+		goto end;
+
+	res = 0;
+end:
+	disconnect_client_from_server(cfg, conn);
+	return res;
+}
+
+static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+	snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx",
+		 VXLAN_ID, VXLAN_PORT);
+}
+
+static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+	bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls");
+
+	snprintf(dst, TUNNEL_ARGS_MAX_LEN,
+		 "encap fou encap-sport auto encap-dport %d",
+		 is_mpls ? MPLS_UDP_PORT : UDP_PORT);
+}
+
+static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add)
+{
+	bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0;
+	int fou_proto;
+
+	if (is_mpls)
+		fou_proto = FOU_MPLS_PROTO;
+	else
+		fou_proto = cfg->ipproto == 6 ? 41 : 4;
+
+	SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del",
+	    is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto,
+	    cfg->ipproto == 6 ? " -6" : "");
+
+	return 0;
+fail:
+	return 1;
+}
+
+static int add_fou_rx_port(struct subtest_cfg *cfg)
+{
+	return configure_fou_rx_port(cfg, true);
+}
+
+static int del_fou_rx_port(struct subtest_cfg *cfg)
+{
+	return configure_fou_rx_port(cfg, false);
+}
+
+static int update_tunnel_intf_addr(struct subtest_cfg *cfg)
+{
+	SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2);
+	return 0;
+fail:
+	return -1;
+}
+
+static int configure_kernel_for_mpls(struct subtest_cfg *cfg)
+{
+	SYS(fail, "sysctl -qw net.mpls.platform_labels=%d",
+	    MPLS_TABLE_ENTRIES_COUNT);
+	SYS(fail, "ip -f mpls route add 1000 dev lo");
+	SYS(fail, "ip link set lo up");
+	SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1");
+	SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0");
+	return 0;
+fail:
+	return -1;
+}
+
+static int configure_encapsulation(struct subtest_cfg *cfg)
+{
+	int ret;
+
+	ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd);
+
+	return ret;
+}
+
+static int configure_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken = open_netns(SERVER_NS);
+	int ret = -1;
+
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return ret;
+
+	if (cfg->configure_fou_rx_port &&
+	    !ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port"))
+		goto fail;
+	SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s",
+	    cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "",
+	    cfg->tunnel_client_addr, cfg->tunnel_server_addr,
+	    cfg->extra_decap_mod_args);
+	if (cfg->tunnel_need_veth_mac &&
+	    !ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac"))
+		goto fail;
+	if (cfg->configure_mpls &&
+	    (!ASSERT_OK(configure_kernel_for_mpls(cfg),
+			"configure MPLS decap")))
+		goto fail;
+	SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0");
+	SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0");
+	SYS(fail, "ip link set dev testtun0 up");
+
+	ret = 0;
+fail:
+	close_netns(nstoken);
+	return ret;
+}
+
+static void remove_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+	SYS_NOFAIL("ip link del testtun0");
+	if (cfg->configure_mpls)
+		SYS_NOFAIL("ip -f mpls route del 1000 dev lo");
+	if (cfg->configure_fou_rx_port)
+		del_fou_rx_port(cfg);
+}
+
+static int configure_ebpf_decapsulation(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken = open_netns(SERVER_NS);
+	int ret = -1;
+
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return ret;
+
+	if (!cfg->expect_kern_decap_failure)
+		SYS(fail, "ip link del testtun0");
+
+	if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1),
+		       "attach_program"))
+		goto fail;
+
+	ret = 0;
+fail:
+	close_netns(nstoken);
+	return ret;
+}
+
+static void run_test(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken;
+
+	if (!ASSERT_OK(run_server(cfg), "run server"))
+		return;
+
+	nstoken = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+		goto fail;
+
+	/* Basic communication must work */
+	if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap"))
+		goto fail;
+
+	/* Attach encapsulation program to client */
+	if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation"))
+		goto fail;
+
+	/* If supported, insert kernel decap module, connection must succeed */
+	if (!cfg->expect_kern_decap_failure) {
+		if (!ASSERT_OK(configure_kernel_decapsulation(cfg),
+					"configure kernel decapsulation"))
+			goto fail;
+		if (!ASSERT_OK(send_and_test_data(cfg, true),
+			       "connect with encap prog and kern decap"))
+			goto fail;
+	}
+
+	/* Replace kernel decapsulation with BPF decapsulation, test must pass */
+	if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation"))
+		goto fail;
+	ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs");
+
+fail:
+	close_netns(nstoken);
+	close(cfg->server_fd);
+}
+
+static int setup(void)
+{
+	struct nstoken *nstoken_client, *nstoken_server;
+	int fd, err;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (!ASSERT_OK_FD(fd, "open urandom"))
+		goto fail;
+	err = read(fd, tx_buffer, BUFFER_LEN);
+	close(fd);
+
+	if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes"))
+		goto fail;
+
+	/* Configure the testing network */
+	if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") ||
+	    !ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+		goto fail;
+
+	nstoken_client = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+		goto fail_delete_ns;
+	SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
+	    "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
+	    "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
+	SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
+	SYS(fail_close_ns_client, "ip link set veth1 up");
+	nstoken_server = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+		goto fail_close_ns_client;
+	SYS(fail_close_ns_server, "ip link set veth2 up");
+
+	close_netns(nstoken_server);
+	close_netns(nstoken_client);
+	return 0;
+
+fail_close_ns_server:
+	close_netns(nstoken_server);
+fail_close_ns_client:
+	close_netns(nstoken_client);
+fail_delete_ns:
+	SYS_NOFAIL("ip netns del " CLIENT_NS);
+	SYS_NOFAIL("ip netns del " SERVER_NS);
+fail:
+	return -1;
+}
+
+static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken_client, *nstoken_server;
+	int ret = -1;
+
+	set_subtest_addresses(cfg);
+	if (!ASSERT_OK(set_subtest_progs(cfg, skel),
+		       "find subtest progs"))
+		goto fail;
+	if (cfg->extra_decap_mod_args_cb)
+		cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args);
+
+	nstoken_client = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+		goto fail;
+	SYS(fail_close_client_ns,
+	    "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+	SYS(fail_close_client_ns, "ip -4 route flush table main");
+	SYS(fail_close_client_ns,
+	    "ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1");
+	SYS(fail_close_client_ns,
+	    "ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad");
+	SYS(fail_close_client_ns, "ip -6 route flush table main");
+	SYS(fail_close_client_ns,
+	    "ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1");
+	nstoken_server = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+		goto fail_close_client_ns;
+	SYS(fail_close_server_ns,
+	    "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+	SYS(fail_close_server_ns,
+	    "ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad");
+
+	ret = 0;
+
+fail_close_server_ns:
+	close_netns(nstoken_server);
+fail_close_client_ns:
+	close_netns(nstoken_client);
+fail:
+	return ret;
+}
+
+
+static void subtest_cleanup(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken;
+
+	nstoken = open_netns(CLIENT_NS);
+	if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+		SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1");
+		SYS_NOFAIL("ip a flush veth1");
+		close_netns(nstoken);
+	}
+	nstoken = open_netns(SERVER_NS);
+	if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+		SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1");
+		SYS_NOFAIL("ip a flush veth2");
+		if (!cfg->expect_kern_decap_failure)
+			remove_kernel_decapsulation(cfg);
+		close_netns(nstoken);
+	}
+}
+
+static void cleanup(void)
+{
+	remove_netns(CLIENT_NS);
+	remove_netns(SERVER_NS);
+}
+
+static struct subtest_cfg subtests_cfg[] = {
+	{
+		.ebpf_tun_type = "ipip",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+	},
+	{
+		.ebpf_tun_type = "ipip6",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 4,
+		.tunnel_client_addr = IP6_ADDR_VETH1,
+		.tunnel_server_addr = IP6_ADDR_VETH2,
+	},
+	{
+		.ebpf_tun_type = "ip6tnl",
+		.iproute_tun_type = "ip6tnl",
+		.mac_tun_type = "none",
+		.ipproto = 6,
+	},
+	{
+		.mac_tun_type = "none",
+		.ebpf_tun_type = "sit",
+		.iproute_tun_type = "sit",
+		.ipproto = 6,
+		.tunnel_client_addr = IP4_ADDR_VETH1,
+		.tunnel_server_addr = IP4_ADDR_VETH2,
+	},
+	{
+		.ebpf_tun_type = "vxlan",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "vxlan",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+		.tunnel_need_veth_mac = true
+	},
+	{
+		.ebpf_tun_type = "ip6vxlan",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "vxlan",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+		.tunnel_need_veth_mac = true
+	},
+	{
+		.ebpf_tun_type = "gre",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "gre",
+		.ipproto = 4,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "gre",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "gretap",
+		.ipproto = 4,
+		.tunnel_need_veth_mac = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "gre",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "gre",
+		.ipproto = 4,
+		.configure_mpls = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6gre",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ip6gre",
+		.ipproto = 6,
+		.test_gso = true,
+	},
+	{
+		.ebpf_tun_type = "ip6gre",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "ip6gretap",
+		.ipproto = 6,
+		.tunnel_need_veth_mac = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6gre",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "ip6gre",
+		.ipproto = 6,
+		.configure_mpls = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "udp",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "udp",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.expect_kern_decap_failure = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "udp",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.tmode = "mode any ttl 255",
+		.configure_mpls = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6udp",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6udp",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.expect_kern_decap_failure = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6udp",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.tmode = "mode any ttl 255",
+		.expect_kern_decap_failure = true,
+		.test_gso = true
+	},
+};
+
+void test_tc_tunnel(void)
+{
+	struct test_tc_tunnel *skel;
+	struct subtest_cfg *cfg;
+	int i, ret;
+
+	skel = test_tc_tunnel__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel open and load"))
+		return;
+
+	if (!ASSERT_OK(setup(), "global setup"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) {
+		cfg = &subtests_cfg[i];
+		ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN);
+		if (ret < 0 || !test__start_subtest(cfg->name))
+			continue;
+		if (subtest_setup(skel, cfg) == 0)
+			run_test(cfg);
+		subtest_cleanup(cfg);
+	}
+	cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index bae0e9de277d..eb9309931272 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -534,85 +534,6 @@ static void ping6_dev1(void)
 	close_netns(nstoken);
 }
 
-static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd)
-{
-	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
-			    .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
-	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
-			    .priority = 1, .prog_fd = igr_fd);
-	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
-			    .priority = 1, .prog_fd = egr_fd);
-	int ret;
-
-	ret = bpf_tc_hook_create(&hook);
-	if (!ASSERT_OK(ret, "create tc hook"))
-		return ret;
-
-	if (igr_fd >= 0) {
-		hook.attach_point = BPF_TC_INGRESS;
-		ret = bpf_tc_attach(&hook, &opts1);
-		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
-			bpf_tc_hook_destroy(&hook);
-			return ret;
-		}
-	}
-
-	if (egr_fd >= 0) {
-		hook.attach_point = BPF_TC_EGRESS;
-		ret = bpf_tc_attach(&hook, &opts2);
-		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
-			bpf_tc_hook_destroy(&hook);
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int generic_attach(const char *dev, int igr_fd, int egr_fd)
-{
-	int ifindex;
-
-	if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
-		return -1;
-	if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
-		return -1;
-
-	ifindex = if_nametoindex(dev);
-	if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
-		return -1;
-
-	return attach_tc_prog(ifindex, igr_fd, egr_fd);
-}
-
-static int generic_attach_igr(const char *dev, int igr_fd)
-{
-	int ifindex;
-
-	if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
-		return -1;
-
-	ifindex = if_nametoindex(dev);
-	if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
-		return -1;
-
-	return attach_tc_prog(ifindex, igr_fd, -1);
-}
-
-static int generic_attach_egr(const char *dev, int egr_fd)
-{
-	int ifindex;
-
-	if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
-		return -1;
-
-	ifindex = if_nametoindex(dev);
-	if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
-		return -1;
-
-	return attach_tc_prog(ifindex, -1, egr_fd);
-}
-
 static void test_vxlan_tunnel(void)
 {
 	struct test_tunnel_kern *skel = NULL;
@@ -635,12 +556,12 @@ static void test_vxlan_tunnel(void)
 		goto done;
 	get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
 	set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
-	if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
+	if (tc_prog_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
 		goto done;
 
 	/* load and attach bpf prog to veth dev tc hook point */
 	set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
-	if (generic_attach_igr("veth1", set_dst_prog_fd))
+	if (tc_prog_attach("veth1", set_dst_prog_fd, -1))
 		goto done;
 
 	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
@@ -648,7 +569,7 @@ static void test_vxlan_tunnel(void)
 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto done;
 	set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
-	if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd))
+	if (tc_prog_attach(VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
 		goto done;
 	close_netns(nstoken);
 
@@ -695,7 +616,7 @@ static void test_ip6vxlan_tunnel(void)
 		goto done;
 	get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
 	set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
-	if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
+	if (tc_prog_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
 		goto done;
 
 	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
@@ -703,7 +624,7 @@ static void test_ip6vxlan_tunnel(void)
 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto done;
 	set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
-	if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd))
+	if (tc_prog_attach(IP6VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
 		goto done;
 	close_netns(nstoken);
 
@@ -764,7 +685,7 @@ static void test_ipip_tunnel(enum ipip_encap encap)
 			skel->progs.ipip_set_tunnel);
 	}
 
-	if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
+	if (tc_prog_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
 		goto done;
 
 	ping_dev0();
@@ -797,7 +718,7 @@ static void test_xfrm_tunnel(void)
 
 	/* attach tc prog to tunnel dev */
 	tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
-	if (generic_attach_igr("veth1", tc_prog_fd))
+	if (tc_prog_attach("veth1", tc_prog_fd, -1))
 		goto done;
 
 	/* attach xdp prog to tunnel dev */
@@ -870,7 +791,7 @@ static void test_gre_tunnel(enum gre_test test)
 	if (!ASSERT_OK(err, "add tunnel"))
 		goto done;
 
-	if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd))
+	if (tc_prog_attach(GRE_TUNL_DEV1, get_fd, set_fd))
 		goto done;
 
 	ping_dev0();
@@ -911,7 +832,7 @@ static void test_ip6gre_tunnel(enum ip6gre_test test)
 
 	set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel);
 	get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel);
-	if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
+	if (tc_prog_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
 		goto done;
 
 	ping6_veth0();
@@ -954,7 +875,7 @@ static void test_erspan_tunnel(enum erspan_test test)
 
 	set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel);
 	get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel);
-	if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
+	if (tc_prog_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
 		goto done;
 
 	ping_dev0();
@@ -990,7 +911,7 @@ static void test_ip6erspan_tunnel(enum erspan_test test)
 
 	set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel);
 	get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel);
-	if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
+	if (tc_prog_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
 		goto done;
 
 	ping6_veth0();
@@ -1017,7 +938,7 @@ static void test_geneve_tunnel(void)
 
 	set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel);
 	get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel);
-	if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
+	if (tc_prog_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
 		goto done;
 
 	ping_dev0();
@@ -1044,7 +965,7 @@ static void test_ip6geneve_tunnel(void)
 
 	set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel);
 	get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel);
-	if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
+	if (tc_prog_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
 		goto done;
 
 	ping_dev0();
@@ -1083,7 +1004,7 @@ static void test_ip6tnl_tunnel(enum ip6tnl_test test)
 		get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel);
 		break;
 	}
-	if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
+	if (tc_prog_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
 		goto done;
 
 	ping6_veth0();
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
new file mode 100644
index 000000000000..5af28f359cfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -0,0 +1,2596 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/if_link.h>
+#include <linux/mman.h>
+#include <linux/netdev.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "network_helpers.h"
+#include "test_xsk.h"
+#include "xsk_xdp_common.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define DEFAULT_BATCH_SIZE		64
+#define MIN_PKT_SIZE			64
+#define MAX_ETH_JUMBO_SIZE		9000
+#define MAX_INTERFACES			2
+#define MAX_TEARDOWN_ITER		10
+#define MAX_TX_BUDGET_DEFAULT		32
+#define PKT_DUMP_NB_TO_PRINT		16
+/* Just to align the data in the packet */
+#define PKT_HDR_SIZE			(sizeof(struct ethhdr) + 2)
+#define POLL_TMOUT			1000
+#define THREAD_TMOUT			3
+#define UMEM_HEADROOM_TEST_SIZE		128
+#define XSK_DESC__INVALID_OPTION	(0xffff)
+#define XSK_UMEM__INVALID_FRAME_SIZE	(MAX_ETH_JUMBO_SIZE + 1)
+#define XSK_UMEM__LARGE_FRAME_SIZE	(3 * 1024)
+#define XSK_UMEM__MAX_FRAME_SIZE	(4 * 1024)
+
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
+bool opt_verbose;
+pthread_barrier_t barr;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int pkts_in_flight;
+
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
+ */
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
+{
+	u32 *ptr = (u32 *)dest, i;
+
+	start /= sizeof(*ptr);
+	size /= sizeof(*ptr);
+	for (i = 0; i < size; i++)
+		ptr[i] = htonl(pkt_nb << 16 | (i + start));
+}
+
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
+{
+	memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+	memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
+	eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
+}
+
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+	return !!ifobj->umem->umem;
+}
+
+static u32 mode_to_xdp_flags(enum test_mode mode)
+{
+	return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+}
+
+static u64 umem_size(struct xsk_umem_info *umem)
+{
+	return umem->num_frames * umem->frame_size;
+}
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+			      u64 size)
+{
+	struct xsk_umem_config cfg = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = umem->frame_size,
+		.frame_headroom = umem->frame_headroom,
+		.flags = XSK_UMEM__DEFAULT_FLAGS
+	};
+	int ret;
+
+	if (umem->fill_size)
+		cfg.fill_size = umem->fill_size;
+
+	if (umem->comp_size)
+		cfg.comp_size = umem->comp_size;
+
+	if (umem->unaligned_mode)
+		cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+	ret = xsk_umem__create(&umem->umem, buffer, size,
+			       &umem->fq, &umem->cq, &cfg);
+	if (ret)
+		return ret;
+
+	umem->buffer = buffer;
+	if (ifobj->shared_umem && ifobj->rx_on) {
+		umem->base_addr = umem_size(umem);
+		umem->next_buffer = umem_size(umem);
+	}
+
+	return 0;
+}
+
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+	u64 addr;
+
+	addr = umem->next_buffer;
+	umem->next_buffer += umem->frame_size;
+	if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+		umem->next_buffer = umem->base_addr;
+
+	return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+	umem->next_buffer = 0;
+}
+
+static int enable_busy_poll(struct xsk_socket_info *xsk)
+{
+	int sock_opt;
+
+	sock_opt = 1;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		return -errno;
+
+	sock_opt = 20;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		return -errno;
+
+	sock_opt = xsk->batch_size;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		return -errno;
+
+	return 0;
+}
+
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+				  struct ifobject *ifobject, bool shared)
+{
+	struct xsk_socket_config cfg = {};
+	struct xsk_ring_cons *rxr;
+	struct xsk_ring_prod *txr;
+
+	xsk->umem = umem;
+	cfg.rx_size = xsk->rxqsize;
+	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	cfg.bind_flags = ifobject->bind_flags;
+	if (shared)
+		cfg.bind_flags |= XDP_SHARED_UMEM;
+	if (ifobject->mtu > MAX_ETH_PKT_SIZE)
+		cfg.bind_flags |= XDP_USE_SG;
+	if (umem->comp_size)
+		cfg.tx_size = umem->comp_size;
+	if (umem->fill_size)
+		cfg.rx_size = umem->fill_size;
+
+	txr = ifobject->tx_on ? &xsk->tx : NULL;
+	rxr = ifobject->rx_on ? &xsk->rx : NULL;
+	return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
+}
+
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+	unsigned int max_skb_frags = 0;
+	FILE *file;
+
+	file = fopen(MAX_SKB_FRAGS_PATH, "r");
+	if (!file) {
+		ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+		return 0;
+	}
+
+	if (fscanf(file, "%u", &max_skb_frags) != 1)
+		ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+	fclose(file);
+	return max_skb_frags;
+}
+
+static int set_ring_size(struct ifobject *ifobj)
+{
+	int ret;
+	u32 ctr = 0;
+
+	while (ctr++ < SOCK_RECONF_CTR) {
+		ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
+		if (!ret)
+			break;
+
+		/* Retry if it fails */
+		if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
+			return -errno;
+
+		usleep(USLEEP_MAX);
+	}
+
+	return ret;
+}
+
+int hw_ring_size_reset(struct ifobject *ifobj)
+{
+	ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
+	ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
+	return set_ring_size(ifobj);
+}
+
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			     struct ifobject *ifobj_rx)
+{
+	u32 i, j;
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->xsk = &ifobj->xsk_arr[0];
+		ifobj->use_poll = false;
+		ifobj->use_fill_ring = true;
+		ifobj->release_rx = true;
+		ifobj->validation_func = NULL;
+		ifobj->use_metadata = false;
+
+		if (i == 0) {
+			ifobj->rx_on = false;
+			ifobj->tx_on = true;
+		} else {
+			ifobj->rx_on = true;
+			ifobj->tx_on = false;
+		}
+
+		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+		ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+		ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+		for (j = 0; j < MAX_SOCKETS; j++) {
+			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+			ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
+			if (i == 0)
+				ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+			else
+				ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+			memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+			memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+			ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+			ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
+		}
+	}
+
+	if (ifobj_tx->hw_ring_size_supp)
+		hw_ring_size_reset(ifobj_tx);
+
+	test->ifobj_tx = ifobj_tx;
+	test->ifobj_rx = ifobj_rx;
+	test->current_step = 0;
+	test->total_steps = 1;
+	test->nb_sockets = 1;
+	test->fail = false;
+	test->set_ring = false;
+	test->adjust_tail = false;
+	test->adjust_tail_support = false;
+	test->mtu = MAX_ETH_PKT_SIZE;
+	test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
+	test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
+	test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
+	test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			   struct ifobject *ifobj_rx, enum test_mode mode,
+			   const struct test_spec *test_to_run)
+{
+	struct pkt_stream *tx_pkt_stream;
+	struct pkt_stream *rx_pkt_stream;
+	u32 i;
+
+	tx_pkt_stream = test->tx_pkt_stream_default;
+	rx_pkt_stream = test->rx_pkt_stream_default;
+	memset(test, 0, sizeof(*test));
+	test->tx_pkt_stream_default = tx_pkt_stream;
+	test->rx_pkt_stream_default = rx_pkt_stream;
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
+		if (mode == TEST_MODE_ZC)
+			ifobj->bind_flags |= XDP_ZEROCOPY;
+		else
+			ifobj->bind_flags |= XDP_COPY;
+	}
+
+	memcpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+	test->test_func = test_to_run->test_func;
+	test->mode = mode;
+	__test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
+				   struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
+				   struct bpf_map *xskmap_tx)
+{
+	test->xdp_prog_rx = xdp_prog_rx;
+	test->xdp_prog_tx = xdp_prog_tx;
+	test->xskmap_rx = xskmap_rx;
+	test->xskmap_tx = xskmap_tx;
+}
+
+static int test_spec_set_mtu(struct test_spec *test, int mtu)
+{
+	int err;
+
+	if (test->ifobj_rx->mtu != mtu) {
+		err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
+		if (err)
+			return err;
+		test->ifobj_rx->mtu = mtu;
+	}
+	if (test->ifobj_tx->mtu != mtu) {
+		err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
+		if (err)
+			return err;
+		test->ifobj_tx->mtu = mtu;
+	}
+
+	return 0;
+}
+
+void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+	if (pkt_stream) {
+		pkt_stream->current_pkt_nb = 0;
+		pkt_stream->nb_rx_pkts = 0;
+	}
+}
+
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
+{
+	if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
+		return NULL;
+
+	return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+}
+
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
+{
+	while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+		(*pkts_sent)++;
+		if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+			return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+		pkt_stream->current_pkt_nb++;
+	}
+	return NULL;
+}
+
+void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+	free(pkt_stream->pkts);
+	free(pkt_stream);
+}
+
+void pkt_stream_restore_default(struct test_spec *test)
+{
+	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+	struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+
+	if (tx_pkt_stream != test->tx_pkt_stream_default) {
+		pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+		test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
+	}
+
+	if (rx_pkt_stream != test->rx_pkt_stream_default) {
+		pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+		test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
+	}
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = calloc(1, sizeof(*pkt_stream));
+	if (!pkt_stream)
+		return NULL;
+
+	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+	if (!pkt_stream->pkts) {
+		free(pkt_stream);
+		return NULL;
+	}
+
+	pkt_stream->nb_pkts = nb_pkts;
+	return pkt_stream;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
+{
+	u32 nb_frags = 1, next_frag;
+
+	if (!pkt)
+		return 1;
+
+	if (!pkt_stream->verbatim) {
+		if (!pkt->valid || !pkt->len)
+			return 1;
+		return ceil_u32(pkt->len, frame_size);
+	}
+
+	/* Search for the end of the packet in verbatim mode */
+	if (!pkt_continues(pkt->options))
+		return nb_frags;
+
+	next_frag = pkt_stream->current_pkt_nb;
+	pkt++;
+	while (next_frag++ < pkt_stream->nb_pkts) {
+		nb_frags++;
+		if (!pkt_continues(pkt->options) || !pkt->valid)
+			break;
+		pkt++;
+	}
+	return nb_frags;
+}
+
+static bool set_pkt_valid(int offset, u32 len)
+{
+	return len <= MAX_ETH_JUMBO_SIZE;
+}
+
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+	pkt->offset = offset;
+	pkt->len = len;
+	pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+	bool prev_pkt_valid = pkt->valid;
+
+	pkt_set(pkt_stream, pkt, offset, len);
+	pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
+}
+
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+	return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = __pkt_stream_alloc(nb_pkts);
+	if (!pkt_stream)
+		return NULL;
+
+	pkt_stream->nb_pkts = nb_pkts;
+	pkt_stream->max_pkt_len = pkt_len;
+	for (i = 0; i < nb_pkts; i++) {
+		struct pkt *pkt = &pkt_stream->pkts[i];
+
+		pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
+		pkt->pkt_nb = nb_start + i * nb_off;
+	}
+
+	return pkt_stream;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+	return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+	return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static int pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
+{
+	ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+
+	if (!ifobj->xsk->pkt_stream)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+	int ret;
+
+	ret = pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+	if (ret)
+		return ret;
+
+	return pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
+}
+
+static int __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
+				      int offset)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+	if (!pkt_stream)
+		return -ENOMEM;
+
+	for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+		pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+
+	ifobj->xsk->pkt_stream = pkt_stream;
+
+	return 0;
+}
+
+static int pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+	int ret = __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
+
+	if (ret)
+		return ret;
+
+	return __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
+}
+
+static int pkt_stream_receive_half(struct test_spec *test)
+{
+	struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+	u32 i;
+
+	if (test->ifobj_rx->xsk->pkt_stream != test->rx_pkt_stream_default)
+		/* Packet stream has already been replaced so we have to release this one.
+		 * The newly created one will be freed by the restore_default() at the
+		 * end of the test
+		 */
+		pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+							      pkt_stream->pkts[0].len);
+	if (!test->ifobj_rx->xsk->pkt_stream)
+		return -ENOMEM;
+
+	pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+		pkt_stream->pkts[i].valid = false;
+
+	pkt_stream->nb_valid_entries /= 2;
+
+	return 0;
+}
+
+static int pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+						   pkt_stream->pkts[0].len, i, 2);
+		if (!pkt_stream)
+			return -ENOMEM;
+		test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+		pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+						   pkt_stream->pkts[0].len, i, 2);
+		if (!pkt_stream)
+			return -ENOMEM;
+		test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+	}
+
+	return 0;
+}
+
+static void release_even_odd_sequence(struct test_spec *test)
+{
+	struct pkt_stream *later_free_tx = test->ifobj_tx->xsk->pkt_stream;
+	struct pkt_stream *later_free_rx = test->ifobj_rx->xsk->pkt_stream;
+	int i;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		/* later_free_{rx/tx} will be freed by restore_default() */
+		if (test->ifobj_tx->xsk_arr[i].pkt_stream != later_free_tx)
+			pkt_stream_delete(test->ifobj_tx->xsk_arr[i].pkt_stream);
+		if (test->ifobj_rx->xsk_arr[i].pkt_stream != later_free_rx)
+			pkt_stream_delete(test->ifobj_rx->xsk_arr[i].pkt_stream);
+	}
+
+}
+
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
+{
+	if (!pkt->valid)
+		return pkt->offset;
+	return pkt->offset + umem_alloc_buffer(umem);
+}
+
+static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
+{
+	pkt_stream->current_pkt_nb--;
+}
+
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+			 u32 pkt_nb, u32 bytes_written)
+{
+	void *data = xsk_umem__get_data(umem->buffer, addr);
+
+	if (len < MIN_PKT_SIZE)
+		return;
+
+	if (!bytes_written) {
+		gen_eth_hdr(xsk, data);
+
+		len -= PKT_HDR_SIZE;
+		data += PKT_HDR_SIZE;
+	} else {
+		bytes_written -= PKT_HDR_SIZE;
+	}
+
+	write_payload(data, pkt_nb, bytes_written, len);
+}
+
+static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
+						       u32 nb_frames, bool verbatim)
+{
+	u32 i, len = 0, pkt_nb = 0, payload = 0;
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = __pkt_stream_alloc(nb_frames);
+	if (!pkt_stream)
+		return NULL;
+
+	for (i = 0; i < nb_frames; i++) {
+		struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
+		struct pkt *frame = &frames[i];
+
+		pkt->offset = frame->offset;
+		if (verbatim) {
+			*pkt = *frame;
+			pkt->pkt_nb = payload;
+			if (!frame->valid || !pkt_continues(frame->options))
+				payload++;
+		} else {
+			if (frame->valid)
+				len += frame->len;
+			if (frame->valid && pkt_continues(frame->options))
+				continue;
+
+			pkt->pkt_nb = pkt_nb;
+			pkt->len = len;
+			pkt->valid = frame->valid;
+			pkt->options = 0;
+
+			len = 0;
+		}
+
+		print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+			      pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
+		if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
+			pkt_stream->max_pkt_len = pkt->len;
+
+		if (pkt->valid)
+			pkt_stream->nb_valid_entries++;
+
+		pkt_nb++;
+	}
+
+	pkt_stream->nb_pkts = pkt_nb;
+	pkt_stream->verbatim = verbatim;
+	return pkt_stream;
+}
+
+static int pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
+	if (!pkt_stream)
+		return -ENOMEM;
+	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+
+	pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
+	if (!pkt_stream)
+		return -ENOMEM;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+
+	return 0;
+}
+
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+	u32 i;
+
+	for (i = 0; i < cnt; i++) {
+		u32 seqnum, pkt_nb;
+
+		seqnum = ntohl(*data) & 0xffff;
+		pkt_nb = ntohl(*data) >> 16;
+		ksft_print_msg("%u:%u ", pkt_nb, seqnum);
+		data++;
+	}
+}
+
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+	struct ethhdr *ethhdr = pkt;
+	u32 i, *data;
+
+	if (eth_header) {
+		/*extract L2 frame */
+		ksft_print_msg("DEBUG>> L2: dst mac: ");
+		for (i = 0; i < ETH_ALEN; i++)
+			ksft_print_msg("%02X", ethhdr->h_dest[i]);
+
+		ksft_print_msg("\nDEBUG>> L2: src mac: ");
+		for (i = 0; i < ETH_ALEN; i++)
+			ksft_print_msg("%02X", ethhdr->h_source[i]);
+
+		data = pkt + PKT_HDR_SIZE;
+	} else {
+		data = pkt;
+	}
+
+	/*extract L5 frame */
+	ksft_print_msg("\nDEBUG>> L5: seqnum: ");
+	pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+	ksft_print_msg("....");
+	if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+		ksft_print_msg("\n.... ");
+		pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+			       PKT_DUMP_NB_TO_PRINT);
+	}
+	ksft_print_msg("\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
+{
+	u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+	u32 offset = addr % umem->frame_size, expected_offset;
+	int pkt_offset = pkt->valid ? pkt->offset : 0;
+
+	if (!umem->unaligned_mode)
+		pkt_offset = 0;
+
+	expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+	if (offset == expected_offset)
+		return true;
+
+	ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
+	return false;
+}
+
+static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
+{
+	void *data = xsk_umem__get_data(buffer, addr);
+	struct xdp_info *meta = data - sizeof(struct xdp_info);
+
+	if (meta->count != pkt->pkt_nb) {
+		ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+			       __func__, pkt->pkt_nb,
+			       (unsigned long long)meta->count);
+		return false;
+	}
+
+	return true;
+}
+
+static int is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx, bool *supported)
+{
+	struct bpf_map *data_map;
+	int adjust_value = 0;
+	int key = 0;
+	int ret;
+
+	data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+	if (!data_map || !bpf_map__is_internal(data_map)) {
+		ksft_print_msg("Error: could not find bss section of XDP program\n");
+		return -EINVAL;
+	}
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+	if (ret) {
+		ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+		return ret;
+	}
+
+	/* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+	 * helper is not supported. Skip the adjust_tail test case in this scenario.
+	 */
+	*supported = adjust_value != -EOPNOTSUPP;
+
+	return 0;
+}
+
+static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
+			  u32 bytes_processed)
+{
+	u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
+	void *data = xsk_umem__get_data(umem->buffer, addr);
+
+	addr -= umem->base_addr;
+
+	if (addr >= umem->num_frames * umem->frame_size ||
+	    addr + len > umem->num_frames * umem->frame_size) {
+		ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+			       (unsigned long long)addr, len);
+		return false;
+	}
+	if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
+		ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+			       (unsigned long long)addr, len);
+		return false;
+	}
+
+	pkt_data = data;
+	if (!bytes_processed) {
+		pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
+		len -= PKT_HDR_SIZE;
+	} else {
+		bytes_processed -= PKT_HDR_SIZE;
+	}
+
+	expected_seqnum = bytes_processed / sizeof(*pkt_data);
+	seqnum = ntohl(*pkt_data) & 0xffff;
+	pkt_nb = ntohl(*pkt_data) >> 16;
+
+	if (expected_pkt_nb != pkt_nb) {
+		ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
+			       __func__, expected_pkt_nb, pkt_nb);
+		goto error;
+	}
+	if (expected_seqnum != seqnum) {
+		ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
+			       __func__, expected_seqnum, seqnum);
+		goto error;
+	}
+
+	words_to_end = len / sizeof(*pkt_data) - 1;
+	pkt_data += words_to_end;
+	seqnum = ntohl(*pkt_data) & 0xffff;
+	expected_seqnum += words_to_end;
+	if (expected_seqnum != seqnum) {
+		ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
+			       __func__, expected_seqnum, seqnum);
+		goto error;
+	}
+
+	return true;
+
+error:
+	pkt_dump(data, len, !bytes_processed);
+	return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+	if (pkt->len != len) {
+		ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
+			       __func__, pkt->len, len);
+		pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
+		return false;
+	}
+
+	return true;
+}
+
+static u32 load_value(u32 *counter)
+{
+	return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+	u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+	u32 cons, ready_to_send;
+	int delta;
+
+	cons = load_value(xsk->tx.consumer);
+	ready_to_send = load_value(xsk->tx.producer) - cons;
+	*ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+	delta = load_value(xsk->tx.consumer) - cons;
+	/* By default, xsk should consume exact @max_budget descs at one
+	 * send in this case where hitting the max budget limit in while
+	 * loop is triggered in __xsk_generic_xmit(). Please make sure that
+	 * the number of descs to be sent is larger than @max_budget, or
+	 * else the tx.consumer will be updated in xskq_cons_peek_desc()
+	 * in time which hides the issue we try to verify.
+	 */
+	if (ready_to_send > max_budget && delta != max_budget)
+		return false;
+
+	return true;
+}
+
+int kick_tx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	if (xsk->check_consumer) {
+		if (!kick_tx_with_check(xsk, &ret))
+			return TEST_FAILURE;
+	} else {
+		ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+	}
+	if (ret >= 0)
+		return TEST_PASS;
+	if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+		usleep(100);
+		return TEST_PASS;
+	}
+	return TEST_FAILURE;
+}
+
+int kick_rx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+	if (ret < 0)
+		return TEST_FAILURE;
+
+	return TEST_PASS;
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+{
+	unsigned int rcvd;
+	u32 idx;
+	int ret;
+
+	if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+		ret = kick_tx(xsk);
+		if (ret)
+			return TEST_FAILURE;
+	}
+
+	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+	if (rcvd) {
+		if (rcvd > xsk->outstanding_tx) {
+			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+			ksft_print_msg("[%s] Too many packets completed\n", __func__);
+			ksft_print_msg("Last completion address: %llx\n",
+				       (unsigned long long)addr);
+			return TEST_FAILURE;
+		}
+
+		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+		xsk->outstanding_tx -= rcvd;
+	}
+
+	return TEST_PASS;
+}
+
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
+{
+	u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
+	u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
+	struct ifobject *ifobj = test->ifobj_rx;
+	struct xsk_umem_info *umem = xsk->umem;
+	struct pollfd fds = { };
+	struct pkt *pkt;
+	u64 first_addr = 0;
+	int ret;
+
+	fds.fd = xsk_socket__fd(xsk->xsk);
+	fds.events = POLLIN;
+
+	ret = kick_rx(xsk);
+	if (ret)
+		return TEST_FAILURE;
+
+	if (ifobj->use_poll) {
+		ret = poll(&fds, 1, POLL_TMOUT);
+		if (ret < 0)
+			return TEST_FAILURE;
+
+		if (!ret) {
+			if (!is_umem_valid(test->ifobj_tx))
+				return TEST_PASS;
+
+			ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+			return TEST_CONTINUE;
+		}
+
+		if (!(fds.revents & POLLIN))
+			return TEST_CONTINUE;
+	}
+
+	rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
+	if (!rcvd)
+		return TEST_CONTINUE;
+
+	if (ifobj->use_fill_ring) {
+		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+		while (ret != rcvd) {
+			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+				ret = poll(&fds, 1, POLL_TMOUT);
+				if (ret < 0)
+					return TEST_FAILURE;
+			}
+			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+		}
+	}
+
+	while (frags_processed < rcvd) {
+		const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+		u64 addr = desc->addr, orig;
+
+		orig = xsk_umem__extract_addr(addr);
+		addr = xsk_umem__add_offset_to_addr(addr);
+
+		if (!nb_frags) {
+			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+			if (!pkt) {
+				ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
+					       __func__, addr, desc->len);
+				return TEST_FAILURE;
+			}
+		}
+
+		print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+			      addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
+
+		if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+		    !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+		    !is_metadata_correct(pkt, umem->buffer, addr)))
+			return TEST_FAILURE;
+
+		if (!nb_frags++)
+			first_addr = addr;
+		frags_processed++;
+		pkt_len += desc->len;
+		if (ifobj->use_fill_ring)
+			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+
+		if (pkt_continues(desc->options))
+			continue;
+
+		/* The complete packet has been received */
+		if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+		    !is_offset_correct(umem, pkt, addr))
+			return TEST_FAILURE;
+
+		pkt_stream->nb_rx_pkts++;
+		nb_frags = 0;
+		pkt_len = 0;
+	}
+
+	if (nb_frags) {
+		/* In the middle of a packet. Start over from beginning of packet. */
+		idx_rx -= nb_frags;
+		xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+		if (ifobj->use_fill_ring) {
+			idx_fq -= nb_frags;
+			xsk_ring_prod__cancel(&umem->fq, nb_frags);
+		}
+		frags_processed -= nb_frags;
+	}
+
+	if (ifobj->use_fill_ring)
+		xsk_ring_prod__submit(&umem->fq, frags_processed);
+	if (ifobj->release_rx)
+		xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+	pthread_mutex_lock(&pacing_mutex);
+	pkts_in_flight -= pkts_sent;
+	pthread_mutex_unlock(&pacing_mutex);
+	pkts_sent = 0;
+
+	return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+			  unsigned long *bitmap)
+{
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+	if (!pkt_stream) {
+		__set_bit(sock_num, bitmap);
+		return false;
+	}
+
+	if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+		__set_bit(sock_num, bitmap);
+		if (bitmap_full(bitmap, test->nb_sockets))
+			return true;
+	}
+
+	return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+	DECLARE_BITMAP(bitmap, test->nb_sockets);
+	struct xsk_socket_info *xsk;
+	u32 sock_num = 0;
+	int res, ret;
+
+	bitmap_zero(bitmap, test->nb_sockets);
+
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		return TEST_FAILURE;
+
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
+	while (1) {
+		xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+		if ((all_packets_received(test, xsk, sock_num, bitmap)))
+			break;
+
+		res = __receive_pkts(test, xsk);
+		if (!(res == TEST_PASS || res == TEST_CONTINUE))
+			return res;
+
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			return TEST_FAILURE;
+
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+		sock_num = (sock_num + 1) % test->nb_sockets;
+	}
+
+	return TEST_PASS;
+}
+
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
+{
+	u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
+	struct xsk_umem_info *umem = ifobject->umem;
+	bool use_poll = ifobject->use_poll;
+	struct pollfd fds = { };
+	int ret;
+
+	buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
+	/* pkts_in_flight might be negative if many invalid packets are sent */
+	if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
+	    buffer_len)) {
+		ret = kick_tx(xsk);
+		if (ret)
+			return TEST_FAILURE;
+		return TEST_CONTINUE;
+	}
+
+	fds.fd = xsk_socket__fd(xsk->xsk);
+	fds.events = POLLOUT;
+
+	while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
+		if (use_poll) {
+			ret = poll(&fds, 1, POLL_TMOUT);
+			if (timeout) {
+				if (ret < 0) {
+					ksft_print_msg("ERROR: [%s] Poll error %d\n",
+						       __func__, errno);
+					return TEST_FAILURE;
+				}
+				if (ret == 0)
+					return TEST_PASS;
+				break;
+			}
+			if (ret <= 0) {
+				ksft_print_msg("ERROR: [%s] Poll error %d\n",
+					       __func__, errno);
+				return TEST_FAILURE;
+			}
+		}
+
+		complete_pkts(xsk, xsk->batch_size);
+	}
+
+	for (i = 0; i < xsk->batch_size; i++) {
+		struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+		u32 nb_frags_left, nb_frags, bytes_written = 0;
+
+		if (!pkt)
+			break;
+
+		nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
+		if (nb_frags > xsk->batch_size - i) {
+			pkt_stream_cancel(pkt_stream);
+			xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
+			break;
+		}
+		nb_frags_left = nb_frags;
+
+		while (nb_frags_left--) {
+			struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+			tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
+			if (pkt_stream->verbatim) {
+				tx_desc->len = pkt->len;
+				tx_desc->options = pkt->options;
+			} else if (nb_frags_left) {
+				tx_desc->len = umem->frame_size;
+				tx_desc->options = XDP_PKT_CONTD;
+			} else {
+				tx_desc->len = pkt->len - bytes_written;
+				tx_desc->options = 0;
+			}
+			if (pkt->valid)
+				pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+					     bytes_written);
+			bytes_written += tx_desc->len;
+
+			print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+				      tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
+			if (nb_frags_left) {
+				i++;
+				if (pkt_stream->verbatim)
+					pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+			}
+		}
+
+		if (pkt && pkt->valid) {
+			valid_pkts++;
+			valid_frags += nb_frags;
+		}
+	}
+
+	pthread_mutex_lock(&pacing_mutex);
+	pkts_in_flight += valid_pkts;
+	pthread_mutex_unlock(&pacing_mutex);
+
+	xsk_ring_prod__submit(&xsk->tx, i);
+	xsk->outstanding_tx += valid_frags;
+
+	if (use_poll) {
+		ret = poll(&fds, 1, POLL_TMOUT);
+		if (ret <= 0) {
+			if (ret == 0 && timeout)
+				return TEST_PASS;
+
+			ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+			return TEST_FAILURE;
+		}
+	}
+
+	if (!timeout) {
+		if (complete_pkts(xsk, i))
+			return TEST_FAILURE;
+
+		usleep(10);
+		return TEST_PASS;
+	}
+
+	return TEST_CONTINUE;
+}
+
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
+{
+	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+	int ret;
+
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		return TEST_FAILURE;
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
+	while (xsk->outstanding_tx) {
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			return TEST_FAILURE;
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+
+		complete_pkts(xsk, xsk->batch_size);
+	}
+
+	return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+	return bitmap_full(bitmap, test->nb_sockets);
+}
+
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
+{
+	bool timeout = !is_umem_valid(test->ifobj_rx);
+	DECLARE_BITMAP(bitmap, test->nb_sockets);
+	u32 i, ret;
+
+	bitmap_zero(bitmap, test->nb_sockets);
+
+	while (!(all_packets_sent(test, bitmap))) {
+		for (i = 0; i < test->nb_sockets; i++) {
+			struct pkt_stream *pkt_stream;
+
+			pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+			if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+				__set_bit(i, bitmap);
+				continue;
+			}
+			ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+			if (ret == TEST_CONTINUE && !test->fail)
+				continue;
+
+			if ((ret || test->fail) && !timeout)
+				return TEST_FAILURE;
+
+			if (ret == TEST_PASS && timeout)
+				return ret;
+
+			ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+			if (ret)
+				return TEST_FAILURE;
+		}
+	}
+
+	return TEST_PASS;
+}
+
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+	int fd = xsk_socket__fd(xsk), err;
+	socklen_t optlen, expected_len;
+
+	optlen = sizeof(*stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+	if (err) {
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
+	}
+
+	expected_len = sizeof(struct xdp_statistics);
+	if (optlen != expected_len) {
+		ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+			       __func__, expected_len, optlen);
+		return TEST_FAILURE;
+	}
+
+	return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	/* The receiver calls getsockopt after receiving the last (valid)
+	 * packet which is not the final packet sent in this test (valid and
+	 * invalid packets are sent in alternating fashion with the final
+	 * packet being invalid). Since the last packet may or may not have
+	 * been dropped already, both outcomes must be allowed.
+	 */
+	if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+	    stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	usleep(1000);
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (stats.rx_ring_full)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	usleep(1000);
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (stats.rx_fill_ring_empty_descs)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
+}
+
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	int fd = xsk_socket__fd(xsk);
+	struct xdp_statistics stats;
+	socklen_t optlen;
+	int err;
+
+	optlen = sizeof(stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+	if (err) {
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
+	}
+
+	if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
+		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+			       __func__,
+			       (unsigned long long)stats.tx_invalid_descs,
+			       ifobject->xsk->pkt_stream->nb_pkts);
+		return TEST_FAILURE;
+	}
+
+	return TEST_PASS;
+}
+
+static int xsk_configure(struct test_spec *test, struct ifobject *ifobject,
+			  struct xsk_umem_info *umem, bool tx)
+{
+	int i, ret;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		bool shared = (ifobject->shared_umem && tx) ? true : !!i;
+		u32 ctr = 0;
+
+		while (ctr++ < SOCK_RECONF_CTR) {
+			ret = xsk_configure_socket(&ifobject->xsk_arr[i], umem,
+						     ifobject, shared);
+			if (!ret)
+				break;
+
+			/* Retry if it fails as xsk_socket__create() is asynchronous */
+			if (ctr >= SOCK_RECONF_CTR)
+				return ret;
+			usleep(USLEEP_MAX);
+		}
+		if (ifobject->busy_poll) {
+			ret = enable_busy_poll(&ifobject->xsk_arr[i]);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
+{
+	int ret = xsk_configure(test, ifobject, test->ifobj_rx->umem, true);
+
+	if (ret)
+		return ret;
+	ifobject->xsk = &ifobject->xsk_arr[0];
+	ifobject->xskmap = test->ifobj_rx->xskmap;
+	memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+	ifobject->umem->base_addr = 0;
+
+	return 0;
+}
+
+static int xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+				   bool fill_up)
+{
+	u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+	u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
+	int ret;
+
+	if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		buffers_to_fill = umem->num_frames;
+	else
+		buffers_to_fill = umem->fill_size;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+	if (ret != buffers_to_fill)
+		return -ENOSPC;
+
+	while (filled < buffers_to_fill) {
+		struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+		u64 addr;
+		u32 i;
+
+		for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
+			if (!pkt) {
+				if (!fill_up)
+					break;
+				addr = filled * umem->frame_size + umem->base_addr;
+			} else if (pkt->offset >= 0) {
+				addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+			} else {
+				addr = pkt->offset + umem_alloc_buffer(umem);
+			}
+
+			*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+			if (++filled >= buffers_to_fill)
+				break;
+		}
+	}
+	xsk_ring_prod__submit(&umem->fq, filled);
+	xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+	pkt_stream_reset(pkt_stream);
+	umem_reset_alloc(umem);
+
+	return 0;
+}
+
+static int thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
+{
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+	int mmap_flags;
+	u64 umem_sz;
+	void *bufs;
+	int ret;
+	u32 i;
+
+	umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+	mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+
+	if (ifobject->umem->unaligned_mode)
+		mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
+
+	if (ifobject->shared_umem)
+		umem_sz *= 2;
+
+	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+	if (bufs == MAP_FAILED)
+		return -errno;
+
+	ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
+	if (ret)
+		return ret;
+
+	ret = xsk_configure(test, ifobject, ifobject->umem, false);
+	if (ret)
+		return ret;
+
+	ifobject->xsk = &ifobject->xsk_arr[0];
+
+	if (!ifobject->rx_on)
+		return 0;
+
+	ret = xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream,
+				     ifobject->use_fill_ring);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		ifobject->xsk = &ifobject->xsk_arr[i];
+		ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void *worker_testapp_validate_tx(void *arg)
+{
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_tx;
+	int err;
+
+	if (test->current_step == 1) {
+		if (!ifobject->shared_umem) {
+			if (thread_common_ops(test, ifobject)) {
+				test->fail = true;
+				pthread_exit(NULL);
+			}
+		} else {
+			if (thread_common_ops_tx(test, ifobject)) {
+				test->fail = true;
+				pthread_exit(NULL);
+			}
+		}
+	}
+
+	err = send_pkts(test, ifobject);
+
+	if (!err && ifobject->validation_func)
+		err = ifobject->validation_func(ifobject);
+	if (err)
+		test->fail = true;
+
+	pthread_exit(NULL);
+}
+
+void *worker_testapp_validate_rx(void *arg)
+{
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_rx;
+	int err;
+
+	if (test->current_step == 1) {
+		err = thread_common_ops(test, ifobject);
+	} else {
+		xsk_clear_xskmap(ifobject->xskmap);
+		err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
+		if (err)
+			ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+				       strerror(-err));
+	}
+
+	pthread_barrier_wait(&barr);
+
+	/* We leave only now in case of error to avoid getting stuck in the barrier */
+	if (err) {
+		test->fail = true;
+		pthread_exit(NULL);
+	}
+
+	err = receive_pkts(test);
+
+	if (!err && ifobject->validation_func)
+		err = ifobject->validation_func(ifobject);
+
+	if (err) {
+		if (!test->adjust_tail) {
+			test->fail = true;
+		} else {
+			bool supported;
+
+			if (is_adjust_tail_supported(ifobject->xdp_progs, &supported))
+				test->fail = true;
+			else if (!supported)
+				test->adjust_tail_support = false;
+			else
+				test->fail = true;
+		}
+	}
+
+	pthread_exit(NULL);
+}
+
+static void testapp_clean_xsk_umem(struct ifobject *ifobj)
+{
+	u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
+
+	if (ifobj->shared_umem)
+		umem_sz *= 2;
+
+	umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+	xsk_umem__delete(ifobj->umem->umem);
+	munmap(ifobj->umem->buffer, umem_sz);
+}
+
+static void handler(int signum)
+{
+	pthread_exit(NULL);
+}
+
+static bool xdp_prog_changed_rx(struct test_spec *test)
+{
+	struct ifobject *ifobj = test->ifobj_rx;
+
+	return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
+}
+
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+	struct ifobject *ifobj = test->ifobj_tx;
+
+	return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
+static int xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
+			     struct bpf_map *xskmap, enum test_mode mode)
+{
+	int err;
+
+	xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
+	err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
+	if (err) {
+		ksft_print_msg("Error attaching XDP program\n");
+		return err;
+	}
+
+	if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
+		if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
+			ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+			return -EINVAL;
+		}
+
+	ifobj->xdp_prog = xdp_prog;
+	ifobj->xskmap = xskmap;
+	ifobj->mode = mode;
+
+	return 0;
+}
+
+static int xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
+				 struct ifobject *ifobj_tx)
+{
+	int err = 0;
+
+	if (xdp_prog_changed_rx(test)) {
+		err = xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
+		if (err)
+			return err;
+	}
+
+	if (!ifobj_tx || ifobj_tx->shared_umem)
+		return 0;
+
+	if (xdp_prog_changed_tx(test))
+		err = xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
+
+	return err;
+}
+
+static void clean_sockets(struct test_spec *test, struct ifobject *ifobj)
+{
+	u32 i;
+
+	if (!ifobj || !test)
+		return;
+
+	for (i = 0; i < test->nb_sockets; i++)
+		xsk_socket__delete(ifobj->xsk_arr[i].xsk);
+}
+
+static void clean_umem(struct test_spec *test, struct ifobject *ifobj1, struct ifobject *ifobj2)
+{
+	if (!ifobj1)
+		return;
+
+	testapp_clean_xsk_umem(ifobj1);
+	if (ifobj2 && !ifobj2->shared_umem)
+		testapp_clean_xsk_umem(ifobj2);
+}
+
+static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
+				      struct ifobject *ifobj2)
+{
+	pthread_t t0, t1;
+	int err;
+
+	if (test->mtu > MAX_ETH_PKT_SIZE) {
+		if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
+						   (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
+			ksft_print_msg("Multi buffer for zero-copy not supported.\n");
+			return TEST_SKIP;
+		}
+		if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
+						   (ifobj2 && !ifobj2->multi_buff_supp))) {
+			ksft_print_msg("Multi buffer not supported.\n");
+			return TEST_SKIP;
+		}
+	}
+	err = test_spec_set_mtu(test, test->mtu);
+	if (err) {
+		ksft_print_msg("Error, could not set mtu.\n");
+		return TEST_FAILURE;
+	}
+
+	if (ifobj2) {
+		if (pthread_barrier_init(&barr, NULL, 2))
+			return TEST_FAILURE;
+		pkt_stream_reset(ifobj2->xsk->pkt_stream);
+	}
+
+	test->current_step++;
+	pkt_stream_reset(ifobj1->xsk->pkt_stream);
+	pkts_in_flight = 0;
+
+	signal(SIGUSR1, handler);
+	/*Spawn RX thread */
+	pthread_create(&t0, NULL, ifobj1->func_ptr, test);
+
+	if (ifobj2) {
+		pthread_barrier_wait(&barr);
+		if (pthread_barrier_destroy(&barr)) {
+			pthread_kill(t0, SIGUSR1);
+			clean_sockets(test, ifobj1);
+			clean_umem(test, ifobj1, NULL);
+			return TEST_FAILURE;
+		}
+
+		/*Spawn TX thread */
+		pthread_create(&t1, NULL, ifobj2->func_ptr, test);
+
+		pthread_join(t1, NULL);
+	}
+
+	if (!ifobj2)
+		pthread_kill(t0, SIGUSR1);
+	else
+		pthread_join(t0, NULL);
+
+	if (test->total_steps == test->current_step || test->fail) {
+		clean_sockets(test, ifobj1);
+		clean_sockets(test, ifobj2);
+		clean_umem(test, ifobj1, ifobj2);
+	}
+
+	if (test->fail)
+		return TEST_FAILURE;
+
+	return TEST_PASS;
+}
+
+static int testapp_validate_traffic(struct test_spec *test)
+{
+	struct ifobject *ifobj_rx = test->ifobj_rx;
+	struct ifobject *ifobj_tx = test->ifobj_tx;
+
+	if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+	    (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+		ksft_print_msg("No huge pages present.\n");
+		return TEST_SKIP;
+	}
+
+	if (test->set_ring) {
+		if (ifobj_tx->hw_ring_size_supp) {
+			if (set_ring_size(ifobj_tx)) {
+				ksft_print_msg("Failed to change HW ring size.\n");
+				return TEST_FAILURE;
+			}
+		} else {
+			ksft_print_msg("Changing HW ring size not supported.\n");
+			return TEST_SKIP;
+		}
+	}
+
+	if (xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx))
+		return TEST_FAILURE;
+	return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
+}
+
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
+{
+	return __testapp_validate_traffic(test, ifobj, NULL);
+}
+
+int testapp_teardown(struct test_spec *test)
+{
+	int i;
+
+	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+		if (testapp_validate_traffic(test))
+			return TEST_FAILURE;
+		test_spec_reset(test);
+	}
+
+	return TEST_PASS;
+}
+
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
+{
+	thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+	struct ifobject *tmp_ifobj = (*ifobj1);
+
+	(*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+	(*ifobj2)->func_ptr = tmp_func_ptr;
+
+	*ifobj1 = *ifobj2;
+	*ifobj2 = tmp_ifobj;
+}
+
+int testapp_bidirectional(struct test_spec *test)
+{
+	int res;
+
+	test->ifobj_tx->rx_on = true;
+	test->ifobj_rx->tx_on = true;
+	test->total_steps = 2;
+	if (testapp_validate_traffic(test))
+		return TEST_FAILURE;
+
+	print_verbose("Switching Tx/Rx direction\n");
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+	res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+	return res;
+}
+
+static int swap_xsk_resources(struct test_spec *test)
+{
+	int ret;
+
+	test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+	test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+	test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+	test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+	test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+	test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
+
+	ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
+	if (ret)
+		return TEST_FAILURE;
+
+	return TEST_PASS;
+}
+
+int testapp_xdp_prog_cleanup(struct test_spec *test)
+{
+	test->total_steps = 2;
+	test->nb_sockets = 2;
+	if (testapp_validate_traffic(test))
+		return TEST_FAILURE;
+
+	if (swap_xsk_resources(test)) {
+		clean_sockets(test, test->ifobj_rx);
+		clean_sockets(test, test->ifobj_tx);
+		clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+		return TEST_FAILURE;
+	}
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_headroom(struct test_spec *test)
+{
+	test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_dropped(struct test_spec *test)
+{
+	if (test->mode == TEST_MODE_ZC) {
+		ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n");
+		return TEST_SKIP;
+	}
+
+	if (pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0))
+		return TEST_FAILURE;
+	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+		XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+	if (pkt_stream_receive_half(test))
+		return TEST_FAILURE;
+	test->ifobj_rx->validation_func = validate_rx_dropped;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+	if (pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0))
+		return TEST_FAILURE;
+	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_full(struct test_spec *test)
+{
+	if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+	test->ifobj_rx->release_rx = false;
+	test->ifobj_rx->validation_func = validate_rx_full;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_fill_empty(struct test_spec *test)
+{
+	if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+	test->ifobj_rx->use_fill_ring = false;
+	test->ifobj_rx->validation_func = validate_fill_empty;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	/* Let half of the packets straddle a 4K buffer boundary */
+	if (pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2))
+		return TEST_FAILURE;
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_single_pkt(struct test_spec *test)
+{
+	struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
+
+	if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+		return TEST_FAILURE;
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc_mb(struct test_spec *test)
+{
+	struct xsk_umem_info *umem = test->ifobj_tx->umem;
+	u64 umem_size = umem->num_frames * umem->frame_size;
+	struct pkt pkts[] = {
+		/* Valid packet for synch to start with */
+		{0, MIN_PKT_SIZE, 0, true, 0},
+		/* Zero frame len is not legal */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, 0, 0, false, 0},
+		/* Invalid address in the second frame */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		/* Invalid len in the middle */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		/* Invalid options in the middle */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
+		/* Transmit 2 frags, receive 3 */
+		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
+		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
+		/* Middle frame crosses chunk boundary with small length */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
+		/* Valid packet for synch so that something is received */
+		{0, MIN_PKT_SIZE, 0, true, 0}};
+
+	if (umem->unaligned_mode) {
+		/* Crossing a chunk boundary allowed */
+		pkts[12].valid = true;
+		pkts[13].valid = true;
+	}
+
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc(struct test_spec *test)
+{
+	struct xsk_umem_info *umem = test->ifobj_tx->umem;
+	u64 umem_size = umem->num_frames * umem->frame_size;
+	struct pkt pkts[] = {
+		/* Zero packet address allowed */
+		{0, MIN_PKT_SIZE, 0, true},
+		/* Allowed packet */
+		{0, MIN_PKT_SIZE, 0, true},
+		/* Straddling the start of umem */
+		{-2, MIN_PKT_SIZE, 0, false},
+		/* Packet too large */
+		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+		/* Up to end of umem allowed */
+		{umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
+		/* After umem ends */
+		{umem_size, MIN_PKT_SIZE, 0, false},
+		/* Straddle the end of umem */
+		{umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+		/* Straddle a 4K boundary */
+		{0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+		/* Straddle a 2K boundary */
+		{0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
+		/* Valid packet for synch so that something is received */
+		{0, MIN_PKT_SIZE, 0, true}};
+
+	if (umem->unaligned_mode) {
+		/* Crossing a page boundary allowed */
+		pkts[7].valid = true;
+	}
+	if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+		/* Crossing a 2K frame size boundary not allowed */
+		pkts[8].valid = false;
+	}
+
+	if (test->ifobj_tx->shared_umem) {
+		pkts[4].offset += umem_size;
+		pkts[5].offset += umem_size;
+		pkts[6].offset += umem_size;
+	}
+
+	if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_drop(struct test_spec *test)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	if (pkt_stream_receive_half(test))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_metadata_copy(struct test_spec *test)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
+			       skel_tx->progs.xsk_xdp_populate_metadata,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+	test->ifobj_rx->use_metadata = true;
+
+	skel_rx->bss->count = 0;
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_shared_umem(struct test_spec *test)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+	int ret;
+
+	test->total_steps = 1;
+	test->nb_sockets = 2;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+			       skel_tx->progs.xsk_xdp_shared_umem,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	if (pkt_stream_even_odd_sequence(test))
+		return TEST_FAILURE;
+
+	ret = testapp_validate_traffic(test);
+
+	release_even_odd_sequence(test);
+
+	return ret;
+}
+
+int testapp_poll_txq_tmout(struct test_spec *test)
+{
+	test->ifobj_tx->use_poll = true;
+	/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+	test->ifobj_tx->umem->frame_size = 2048;
+	if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048))
+		return TEST_FAILURE;
+	return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+}
+
+int testapp_poll_rxq_tmout(struct test_spec *test)
+{
+	test->ifobj_rx->use_poll = true;
+	return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+}
+
+int testapp_too_many_frags(struct test_spec *test)
+{
+	struct pkt *pkts;
+	u32 max_frags, i;
+	int ret = TEST_FAILURE;
+
+	if (test->mode == TEST_MODE_ZC) {
+		max_frags = test->ifobj_tx->xdp_zc_max_segs;
+	} else {
+		max_frags = get_max_skb_frags();
+		if (!max_frags) {
+			ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n");
+			max_frags = 17;
+		}
+		max_frags += 1;
+	}
+
+	pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+	if (!pkts)
+		return TEST_FAILURE;
+
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+
+	/* Valid packet for synch */
+	pkts[0].len = MIN_PKT_SIZE;
+	pkts[0].valid = true;
+
+	/* One valid packet with the max amount of frags */
+	for (i = 1; i < max_frags + 1; i++) {
+		pkts[i].len = MIN_PKT_SIZE;
+		pkts[i].options = XDP_PKT_CONTD;
+		pkts[i].valid = true;
+	}
+	pkts[max_frags].options = 0;
+
+	/* An invalid packet with the max amount of frags but signals packet
+	 * continues on the last frag
+	 */
+	for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
+		pkts[i].len = MIN_PKT_SIZE;
+		pkts[i].options = XDP_PKT_CONTD;
+		pkts[i].valid = false;
+	}
+
+	/* Valid packet for synch */
+	pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
+	pkts[2 * max_frags + 1].valid = true;
+
+	if (pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2)) {
+		free(pkts);
+		return TEST_FAILURE;
+	}
+
+	ret = testapp_validate_traffic(test);
+	free(pkts);
+	return ret;
+}
+
+static int xsk_load_xdp_programs(struct ifobject *ifobj)
+{
+	ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
+	if (libbpf_get_error(ifobj->xdp_progs))
+		return libbpf_get_error(ifobj->xdp_progs);
+
+	return 0;
+}
+
+/* Simple test */
+static bool hugepages_present(void)
+{
+	size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	void *bufs;
+
+	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+	if (bufs == MAP_FAILED)
+		return false;
+
+	mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+	munmap(bufs, mmap_sz);
+	return true;
+}
+
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
+{
+	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+	int err;
+
+	ifobj->func_ptr = func_ptr;
+
+	err = xsk_load_xdp_programs(ifobj);
+	if (err) {
+		ksft_print_msg("Error loading XDP program\n");
+		return err;
+	}
+
+	if (hugepages_present())
+		ifobj->unaligned_supp = true;
+
+	err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
+	if (err) {
+		ksft_print_msg("Error querying XDP capabilities\n");
+		return err;
+	}
+	if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
+		ifobj->multi_buff_supp = true;
+	if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+		if (query_opts.xdp_zc_max_segs > 1) {
+			ifobj->multi_buff_zc_supp = true;
+			ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
+		} else {
+			ifobj->xdp_zc_max_segs = 0;
+		}
+	}
+
+	return 0;
+}
+
+int testapp_send_receive(struct test_spec *test)
+{
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+	test->ifobj_tx->umem->frame_size = 2048;
+	test->ifobj_rx->umem->frame_size = 2048;
+	if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_poll_rx(struct test_spec *test)
+{
+	test->ifobj_rx->use_poll = true;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_poll_tx(struct test_spec *test)
+{
+	test->ifobj_tx->use_poll = true;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_aligned_inv_desc(struct test_spec *test)
+{
+	return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+	test->ifobj_tx->umem->frame_size = 2048;
+	test->ifobj_rx->umem->frame_size = 2048;
+	return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+	u64 page_size, umem_size;
+
+	/* Odd frame size so the UMEM doesn't end near a page boundary. */
+	test->ifobj_tx->umem->frame_size = 4001;
+	test->ifobj_rx->umem->frame_size = 4001;
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	/* This test exists to test descriptors that staddle the end of
+	 * the UMEM but not a page.
+	 */
+	page_size = sysconf(_SC_PAGESIZE);
+	umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+	assert(umem_size % page_size > MIN_PKT_SIZE);
+	assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+	return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+	return testapp_invalid_desc_mb(test);
+}
+
+int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	return testapp_invalid_desc_mb(test);
+}
+
+int testapp_xdp_metadata(struct test_spec *test)
+{
+	return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_hw_sw_min_ring_size(struct test_spec *test)
+{
+	int ret;
+
+	test->set_ring = true;
+	test->total_steps = 2;
+	test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
+	test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
+	test->ifobj_tx->xsk->batch_size = 1;
+	test->ifobj_rx->xsk->batch_size = 1;
+	ret = testapp_validate_traffic(test);
+	if (ret)
+		return ret;
+
+	/* Set batch size to hw_ring_size - 1 */
+	test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+	test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_hw_sw_max_ring_size(struct test_spec *test)
+{
+	u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
+	int ret;
+
+	test->set_ring = true;
+	test->total_steps = 2;
+	test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
+	test->ifobj_tx->ring.rx_pending  = test->ifobj_tx->ring.rx_max_pending;
+	test->ifobj_rx->umem->num_frames = max_descs;
+	test->ifobj_rx->umem->fill_size = max_descs;
+	test->ifobj_rx->umem->comp_size = max_descs;
+	test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+	ret = testapp_validate_traffic(test);
+	if (ret)
+		return ret;
+
+	/* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
+	 * updating the Rx HW tail register.
+	 */
+	test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+	test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+	if (pkt_stream_replace(test, max_descs, MIN_PKT_SIZE)) {
+		clean_sockets(test, test->ifobj_tx);
+		clean_sockets(test, test->ifobj_rx);
+		clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+		return TEST_FAILURE;
+	}
+
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+			       skel_tx->progs.xsk_xdp_adjust_tail,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	skel_rx->bss->adjust_value = adjust_value;
+
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+	int ret;
+
+	test->adjust_tail_support = true;
+	test->adjust_tail = true;
+	test->total_steps = 1;
+
+	ret = pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+	if (ret)
+		return TEST_FAILURE;
+
+	ret = pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+	if (ret)
+		return TEST_FAILURE;
+
+	ret = testapp_xdp_adjust_tail(test, value);
+	if (ret)
+		return ret;
+
+	if (!test->adjust_tail_support) {
+		ksft_print_msg("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+				      mode_string(test), busy_poll_string(test));
+		return TEST_SKIP;
+	}
+
+	return 0;
+}
+
+int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+	/* Shrink by 4 bytes for testing purpose */
+	return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	/* Shrink by the frag size */
+	return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow(struct test_spec *test)
+{
+	/* Grow by 4 bytes for testing purpose */
+	return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	/* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+	return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+				   XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_tx_queue_consumer(struct test_spec *test)
+{
+	int nr_packets;
+
+	if (test->mode == TEST_MODE_ZC) {
+		ksft_print_msg("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+		return TEST_SKIP;
+	}
+
+	nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+	if (pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	test->ifobj_tx->xsk->batch_size = nr_packets;
+	test->ifobj_tx->xsk->check_consumer = true;
+
+	return testapp_validate_traffic(test);
+}
+
+struct ifobject *ifobject_create(void)
+{
+	struct ifobject *ifobj;
+
+	ifobj = calloc(1, sizeof(struct ifobject));
+	if (!ifobj)
+		return NULL;
+
+	ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
+	if (!ifobj->xsk_arr)
+		goto out_xsk_arr;
+
+	ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+	if (!ifobj->umem)
+		goto out_umem;
+
+	return ifobj;
+
+out_umem:
+	free(ifobj->xsk_arr);
+out_xsk_arr:
+	free(ifobj);
+	return NULL;
+}
+
+void ifobject_delete(struct ifobject *ifobj)
+{
+	free(ifobj->umem);
+	free(ifobj->xsk_arr);
+	free(ifobj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
new file mode 100644
index 000000000000..8fc78a057de0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TEST_XSK_H_
+#define TEST_XSK_H_
+
+#include <linux/ethtool.h>
+#include <linux/if_xdp.h>
+
+#include "../kselftest.h"
+#include "xsk.h"
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define TEST_CONTINUE 1
+#define TEST_SKIP 2
+
+#define DEFAULT_PKT_CNT			(4 * 1024)
+#define DEFAULT_UMEM_BUFFERS		(DEFAULT_PKT_CNT / 4)
+#define HUGEPAGE_SIZE			(2 * 1024 * 1024)
+#define MIN_PKT_SIZE			64
+#define MAX_ETH_PKT_SIZE		1518
+#define MAX_INTERFACE_NAME_CHARS	16
+#define MAX_TEST_NAME_SIZE		48
+#define SOCK_RECONF_CTR			10
+#define USLEEP_MAX			10000
+
+extern bool opt_verbose;
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
+
+static inline u32 ceil_u32(u32 a, u32 b)
+{
+	return (a + b - 1) / b;
+}
+
+static inline u64 ceil_u64(u64 a, u64 b)
+{
+	return (a + b - 1) / b;
+}
+
+/* Simple test */
+enum test_mode {
+	TEST_MODE_SKB,
+	TEST_MODE_DRV,
+	TEST_MODE_ZC,
+	TEST_MODE_ALL
+};
+
+struct ifobject;
+struct test_spec;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
+typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
+
+struct xsk_socket_info {
+	struct xsk_ring_cons rx;
+	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
+	struct xsk_socket *xsk;
+	struct pkt_stream *pkt_stream;
+	u32 outstanding_tx;
+	u32 rxqsize;
+	u32 batch_size;
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	bool check_consumer;
+};
+
+int kick_rx(struct xsk_socket_info *xsk);
+int kick_tx(struct xsk_socket_info *xsk);
+
+struct xsk_umem_info {
+	struct xsk_ring_prod fq;
+	struct xsk_ring_cons cq;
+	struct xsk_umem *umem;
+	u64 next_buffer;
+	u32 num_frames;
+	u32 frame_headroom;
+	void *buffer;
+	u32 frame_size;
+	u32 base_addr;
+	u32 fill_size;
+	u32 comp_size;
+	bool unaligned_mode;
+};
+
+struct set_hw_ring {
+	u32 default_tx;
+	u32 default_rx;
+};
+
+int hw_ring_size_reset(struct ifobject *ifobj);
+
+struct ifobject {
+	char ifname[MAX_INTERFACE_NAME_CHARS];
+	struct xsk_socket_info *xsk;
+	struct xsk_socket_info *xsk_arr;
+	struct xsk_umem_info *umem;
+	thread_func_t func_ptr;
+	validation_func_t validation_func;
+	struct xsk_xdp_progs *xdp_progs;
+	struct bpf_map *xskmap;
+	struct bpf_program *xdp_prog;
+	struct ethtool_ringparam ring;
+	struct set_hw_ring set_ring;
+	enum test_mode mode;
+	int ifindex;
+	int mtu;
+	u32 bind_flags;
+	u32 xdp_zc_max_segs;
+	bool tx_on;
+	bool rx_on;
+	bool use_poll;
+	bool busy_poll;
+	bool use_fill_ring;
+	bool release_rx;
+	bool shared_umem;
+	bool use_metadata;
+	bool unaligned_supp;
+	bool multi_buff_supp;
+	bool multi_buff_zc_supp;
+	bool hw_ring_size_supp;
+};
+struct ifobject *ifobject_create(void);
+void ifobject_delete(struct ifobject *ifobj);
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr);
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, u64 size);
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+			 struct ifobject *ifobject, bool shared);
+
+
+struct pkt {
+	int offset;
+	u32 len;
+	u32 pkt_nb;
+	bool valid;
+	u16 options;
+};
+
+struct pkt_stream {
+	u32 nb_pkts;
+	u32 current_pkt_nb;
+	struct pkt *pkts;
+	u32 max_pkt_len;
+	u32 nb_rx_pkts;
+	u32 nb_valid_entries;
+	bool verbatim;
+};
+
+static inline bool pkt_continues(u32 options)
+{
+	return options & XDP_PKT_CONTD;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len);
+void pkt_stream_delete(struct pkt_stream *pkt_stream);
+void pkt_stream_reset(struct pkt_stream *pkt_stream);
+void pkt_stream_restore_default(struct test_spec *test);
+
+struct test_spec {
+	struct ifobject *ifobj_tx;
+	struct ifobject *ifobj_rx;
+	struct pkt_stream *tx_pkt_stream_default;
+	struct pkt_stream *rx_pkt_stream_default;
+	struct bpf_program *xdp_prog_rx;
+	struct bpf_program *xdp_prog_tx;
+	struct bpf_map *xskmap_rx;
+	struct bpf_map *xskmap_tx;
+	test_func_t test_func;
+	int mtu;
+	u16 total_steps;
+	u16 current_step;
+	u16 nb_sockets;
+	bool fail;
+	bool set_ring;
+	bool adjust_tail;
+	bool adjust_tail_support;
+	enum test_mode mode;
+	char name[MAX_TEST_NAME_SIZE];
+};
+
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+static inline char *mode_string(struct test_spec *test)
+{
+	switch (test->mode) {
+	case TEST_MODE_SKB:
+		return "SKB";
+	case TEST_MODE_DRV:
+		return "DRV";
+	case TEST_MODE_ZC:
+		return "ZC";
+	default:
+		return "BOGUS";
+	}
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+	       struct ifobject *ifobj_rx, enum test_mode mode,
+	       const struct test_spec *test_to_run);
+
+int testapp_adjust_tail_grow(struct test_spec *test);
+int testapp_adjust_tail_grow_mb(struct test_spec *test);
+int testapp_adjust_tail_shrink(struct test_spec *test);
+int testapp_adjust_tail_shrink_mb(struct test_spec *test);
+int testapp_aligned_inv_desc(struct test_spec *test);
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test);
+int testapp_aligned_inv_desc_mb(struct test_spec *test);
+int testapp_bidirectional(struct test_spec *test);
+int testapp_headroom(struct test_spec *test);
+int testapp_hw_sw_max_ring_size(struct test_spec *test);
+int testapp_hw_sw_min_ring_size(struct test_spec *test);
+int testapp_poll_rx(struct test_spec *test);
+int testapp_poll_rxq_tmout(struct test_spec *test);
+int testapp_poll_tx(struct test_spec *test);
+int testapp_poll_txq_tmout(struct test_spec *test);
+int testapp_send_receive(struct test_spec *test);
+int testapp_send_receive_2k_frame(struct test_spec *test);
+int testapp_send_receive_mb(struct test_spec *test);
+int testapp_send_receive_unaligned(struct test_spec *test);
+int testapp_send_receive_unaligned_mb(struct test_spec *test);
+int testapp_single_pkt(struct test_spec *test);
+int testapp_stats_fill_empty(struct test_spec *test);
+int testapp_stats_rx_dropped(struct test_spec *test);
+int testapp_stats_tx_invalid_descs(struct test_spec *test);
+int testapp_stats_rx_full(struct test_spec *test);
+int testapp_teardown(struct test_spec *test);
+int testapp_too_many_frags(struct test_spec *test);
+int testapp_tx_queue_consumer(struct test_spec *test);
+int testapp_unaligned_inv_desc(struct test_spec *test);
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test);
+int testapp_unaligned_inv_desc_mb(struct test_spec *test);
+int testapp_xdp_drop(struct test_spec *test);
+int testapp_xdp_metadata(struct test_spec *test);
+int testapp_xdp_metadata_mb(struct test_spec *test);
+int testapp_xdp_prog_cleanup(struct test_spec *test);
+int testapp_xdp_shared_umem(struct test_spec *test);
+
+void *worker_testapp_validate_rx(void *arg);
+void *worker_testapp_validate_tx(void *arg);
+
+static const struct test_spec tests[] = {
+	{.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+	{.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+	{.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+	{.name = "POLL_RX", .test_func = testapp_poll_rx},
+	{.name = "POLL_TX", .test_func = testapp_poll_tx},
+	{.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+	{.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+	{.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+	{.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+	{.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+	{.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+	{.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+	{.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+	{.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+	{.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+	{.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+	{.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+	{.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+	{.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+	{.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+	{.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+	{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+	{.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+	{.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
+	};
+
+static const struct test_spec ci_skip_tests[] = {
+	/* Flaky tests */
+	{.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+	{.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+	{.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+	{.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+	/* Tests with huge page dependency */
+	{.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+	{.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+	{.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+	 .test_func = testapp_unaligned_inv_desc_4001_frame},
+	{.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+	 .test_func = testapp_send_receive_unaligned_mb},
+	{.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+	/* Test with HW ring size dependency */
+	{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
+	{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+	/* Too long test */
+	{.name = "TEARDOWN", .test_func = testapp_teardown},
+};
+
+
+#endif				/* TEST_XSK_H_ */
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 28e81161e6fc..4b4b081b46cc 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -7,6 +7,7 @@
 #include "verifier_arena.skel.h"
 #include "verifier_arena_large.skel.h"
 #include "verifier_array_access.skel.h"
+#include "verifier_async_cb_context.skel.h"
 #include "verifier_basic_stack.skel.h"
 #include "verifier_bitfield_write.skel.h"
 #include "verifier_bounds.skel.h"
@@ -34,6 +35,7 @@
 #include "verifier_global_subprogs.skel.h"
 #include "verifier_global_ptr_args.skel.h"
 #include "verifier_gotol.skel.h"
+#include "verifier_gotox.skel.h"
 #include "verifier_helper_access_var_len.skel.h"
 #include "verifier_helper_packet_access.skel.h"
 #include "verifier_helper_restricted.skel.h"
@@ -172,6 +174,7 @@ void test_verifier_div_overflow(void)         { RUN(verifier_div_overflow); }
 void test_verifier_global_subprogs(void)      { RUN(verifier_global_subprogs); }
 void test_verifier_global_ptr_args(void)      { RUN(verifier_global_ptr_args); }
 void test_verifier_gotol(void)                { RUN(verifier_gotol); }
+void test_verifier_gotox(void)                { RUN(verifier_gotox); }
 void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
 void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
 void test_verifier_helper_restricted(void)    { RUN(verifier_helper_restricted); }
@@ -280,6 +283,7 @@ void test_verifier_array_access(void)
 		      verifier_array_access__elf_bytes,
 		      init_array_access_maps);
 }
+void test_verifier_async_cb_context(void)    { RUN(verifier_async_cb_context); }
 
 static int init_value_ptr_arith_maps(struct bpf_object *obj)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
index 99e438fe12ac..15c67d23128b 100644
--- a/tools/testing/selftests/bpf/prog_tests/wq.c
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -38,3 +38,59 @@ void serial_test_failures_wq(void)
 {
 	RUN_TESTS(wq_failures);
 }
+
+static void test_failure_map_no_btf(void)
+{
+	struct wq *skel = NULL;
+	char log[8192];
+	const struct bpf_insn *insns;
+	size_t insn_cnt;
+	int ret, err, map_fd;
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_size = sizeof(log), .log_buf = log,
+		    .log_level = 2);
+
+	skel = wq__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	err = bpf_object__prepare(skel->obj);
+	if (!ASSERT_OK(err, "skel__prepare"))
+		goto out;
+
+	map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "map_no_btf", sizeof(__u32), sizeof(__u64), 100,
+				NULL);
+	if (!ASSERT_GT(map_fd, -1, "map create"))
+		goto out;
+
+	err = bpf_map__reuse_fd(skel->maps.array, map_fd);
+	if (!ASSERT_OK(err, "map reuse fd")) {
+		close(map_fd);
+		goto out;
+	}
+
+	insns = bpf_program__insns(skel->progs.test_map_no_btf);
+	if (!ASSERT_OK_PTR(insns, "insns ptr"))
+		goto out;
+
+	insn_cnt = bpf_program__insn_cnt(skel->progs.test_map_no_btf);
+	if (!ASSERT_GT(insn_cnt, 0u, "insn cnt"))
+		goto out;
+
+	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+	if (!ASSERT_LT(ret, 0, "prog load failed")) {
+		if (ret > 0)
+			close(ret);
+		goto out;
+	}
+
+	ASSERT_HAS_SUBSTR(log, "map 'map_no_btf' has to have BTF in order to use bpf_wq",
+			  "log complains no map BTF");
+out:
+	wq__destroy(skel);
+}
+
+void test_wq_custom(void)
+{
+	if (test__start_subtest("test_failure_map_no_btf"))
+		test_failure_map_no_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index 178292d1251a..ee94c281888a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -124,10 +124,10 @@ static int send_test_packet(int ifindex)
 	int n, sock = -1;
 	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
 
-	/* The ethernet header is not relevant for this test and doesn't need to
-	 * be meaningful.
-	 */
-	struct ethhdr eth = { 0 };
+	/* We use the Ethernet header only to identify the test packet */
+	struct ethhdr eth = {
+		.h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+	};
 
 	memcpy(packet, &eth, sizeof(eth));
 	memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
@@ -160,8 +160,16 @@ static int write_test_packet(int tap_fd)
 	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
 	int n;
 
-	/* The ethernet header doesn't need to be valid for this test */
-	memset(packet, 0, sizeof(struct ethhdr));
+	/* The Ethernet header is mostly not relevant. We use it to identify the
+	 * test packet and some BPF helpers we exercise expect to operate on
+	 * Ethernet frames carrying IP packets. Pretend that's the case.
+	 */
+	struct ethhdr eth = {
+		.h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+		.h_proto = htons(ETH_P_IP),
+	};
+
+	memcpy(packet, &eth, sizeof(eth));
 	memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
 
 	n = write(tap_fd, packet, sizeof(packet));
@@ -171,31 +179,19 @@ static int write_test_packet(int tap_fd)
 	return 0;
 }
 
-static void assert_test_result(const struct bpf_map *result_map)
-{
-	int err;
-	__u32 map_key = 0;
-	__u8 map_value[TEST_PAYLOAD_LEN];
-
-	err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key),
-				   &map_value, TEST_PAYLOAD_LEN, BPF_ANY);
-	if (!ASSERT_OK(err, "lookup test_result"))
-		return;
-
-	ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN,
-		     "test_result map contains test payload");
-}
-
-static bool clear_test_result(struct bpf_map *result_map)
+static void dump_err_stream(const struct bpf_program *prog)
 {
-	const __u8 v[sizeof(test_payload)] = {};
-	const __u32 k = 0;
-	int err;
-
-	err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY);
-	ASSERT_OK(err, "update test_result");
+	char buf[512];
+	int ret;
 
-	return err == 0;
+	ret = 0;
+	do {
+		ret = bpf_prog_stream_read(bpf_program__fd(prog),
+					   BPF_STREAM_STDERR, buf, sizeof(buf),
+					   NULL);
+		if (ret > 0)
+			fwrite(buf, sizeof(buf[0]), ret, stderr);
+	} while (ret > 0);
 }
 
 void test_xdp_context_veth(void)
@@ -270,11 +266,14 @@ void test_xdp_context_veth(void)
 	if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
 		goto close;
 
+	skel->bss->test_pass = false;
+
 	ret = send_test_packet(tx_ifindex);
 	if (!ASSERT_OK(ret, "send_test_packet"))
 		goto close;
 
-	assert_test_result(skel->maps.test_result);
+	if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass"))
+		dump_err_stream(tc_prog);
 
 close:
 	close_netns(nstoken);
@@ -286,7 +285,7 @@ close:
 static void test_tuntap(struct bpf_program *xdp_prog,
 			struct bpf_program *tc_prio_1_prog,
 			struct bpf_program *tc_prio_2_prog,
-			struct bpf_map *result_map)
+			bool *test_pass)
 {
 	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
 	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
@@ -295,8 +294,7 @@ static void test_tuntap(struct bpf_program *xdp_prog,
 	int tap_ifindex;
 	int ret;
 
-	if (!clear_test_result(result_map))
-		return;
+	*test_pass = false;
 
 	ns = netns_new(TAP_NETNS, true);
 	if (!ASSERT_OK_PTR(ns, "create and open ns"))
@@ -340,7 +338,8 @@ static void test_tuntap(struct bpf_program *xdp_prog,
 	if (!ASSERT_OK(ret, "write_test_packet"))
 		goto close;
 
-	assert_test_result(result_map);
+	if (!ASSERT_TRUE(*test_pass, "test_pass"))
+		dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog);
 
 close:
 	if (tap_fd >= 0)
@@ -411,7 +410,8 @@ static void test_tuntap_mirred(struct bpf_program *xdp_prog,
 	if (!ASSERT_OK(ret, "write_test_packet"))
 		goto close;
 
-	ASSERT_TRUE(*test_pass, "test_pass");
+	if (!ASSERT_TRUE(*test_pass, "test_pass"))
+		dump_err_stream(tc_prog);
 
 close:
 	if (tap_fd >= 0)
@@ -431,61 +431,82 @@ void test_xdp_context_tuntap(void)
 		test_tuntap(skel->progs.ing_xdp,
 			    skel->progs.ing_cls,
 			    NULL, /* tc prio 2 */
-			    skel->maps.test_result);
+			    &skel->bss->test_pass);
 	if (test__start_subtest("dynptr_read"))
 		test_tuntap(skel->progs.ing_xdp,
 			    skel->progs.ing_cls_dynptr_read,
 			    NULL, /* tc prio 2 */
-			    skel->maps.test_result);
+			    &skel->bss->test_pass);
 	if (test__start_subtest("dynptr_slice"))
 		test_tuntap(skel->progs.ing_xdp,
 			    skel->progs.ing_cls_dynptr_slice,
 			    NULL, /* tc prio 2 */
-			    skel->maps.test_result);
+			    &skel->bss->test_pass);
 	if (test__start_subtest("dynptr_write"))
 		test_tuntap(skel->progs.ing_xdp_zalloc_meta,
 			    skel->progs.ing_cls_dynptr_write,
 			    skel->progs.ing_cls_dynptr_read,
-			    skel->maps.test_result);
+			    &skel->bss->test_pass);
 	if (test__start_subtest("dynptr_slice_rdwr"))
 		test_tuntap(skel->progs.ing_xdp_zalloc_meta,
 			    skel->progs.ing_cls_dynptr_slice_rdwr,
 			    skel->progs.ing_cls_dynptr_slice,
-			    skel->maps.test_result);
+			    &skel->bss->test_pass);
 	if (test__start_subtest("dynptr_offset"))
 		test_tuntap(skel->progs.ing_xdp_zalloc_meta,
 			    skel->progs.ing_cls_dynptr_offset_wr,
 			    skel->progs.ing_cls_dynptr_offset_rd,
-			    skel->maps.test_result);
+			    &skel->bss->test_pass);
 	if (test__start_subtest("dynptr_offset_oob"))
 		test_tuntap(skel->progs.ing_xdp,
 			    skel->progs.ing_cls_dynptr_offset_oob,
 			    skel->progs.ing_cls,
-			    skel->maps.test_result);
-	if (test__start_subtest("clone_data_meta_empty_on_data_write"))
+			    &skel->bss->test_pass);
+	if (test__start_subtest("clone_data_meta_survives_data_write"))
 		test_tuntap_mirred(skel->progs.ing_xdp,
-				   skel->progs.clone_data_meta_empty_on_data_write,
+				   skel->progs.clone_data_meta_survives_data_write,
 				   &skel->bss->test_pass);
-	if (test__start_subtest("clone_data_meta_empty_on_meta_write"))
+	if (test__start_subtest("clone_data_meta_survives_meta_write"))
 		test_tuntap_mirred(skel->progs.ing_xdp,
-				   skel->progs.clone_data_meta_empty_on_meta_write,
+				   skel->progs.clone_data_meta_survives_meta_write,
 				   &skel->bss->test_pass);
-	if (test__start_subtest("clone_dynptr_empty_on_data_slice_write"))
+	if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write"))
 		test_tuntap_mirred(skel->progs.ing_xdp,
-				   skel->progs.clone_dynptr_empty_on_data_slice_write,
+				   skel->progs.clone_meta_dynptr_survives_data_slice_write,
 				   &skel->bss->test_pass);
-	if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write"))
+	if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write"))
 		test_tuntap_mirred(skel->progs.ing_xdp,
-				   skel->progs.clone_dynptr_empty_on_meta_slice_write,
+				   skel->progs.clone_meta_dynptr_survives_meta_slice_write,
 				   &skel->bss->test_pass);
-	if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write"))
+	if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write"))
 		test_tuntap_mirred(skel->progs.ing_xdp,
-				   skel->progs.clone_dynptr_rdonly_before_data_dynptr_write,
+				   skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write,
 				   &skel->bss->test_pass);
-	if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write"))
+	if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write"))
 		test_tuntap_mirred(skel->progs.ing_xdp,
-				   skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write,
+				   skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write,
 				   &skel->bss->test_pass);
+	/* Tests for BPF helpers which touch headroom */
+	if (test__start_subtest("helper_skb_vlan_push_pop"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_vlan_push_pop,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("helper_skb_adjust_room"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_adjust_room,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("helper_skb_change_head_tail"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_change_head_tail,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("helper_skb_change_proto"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_change_proto,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
 
 	test_xdp_meta__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c
new file mode 100644
index 000000000000..dd4c35c0e428
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xsk.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/if.h>
+#include <stdarg.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xsk.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define VETH_RX "veth0"
+#define VETH_TX "veth1"
+#define MTU	1500
+
+int setup_veth(bool busy_poll)
+{
+	SYS(fail,
+	"ip link add %s numtxqueues 4 numrxqueues 4 type veth peer name %s numtxqueues 4 numrxqueues 4",
+	VETH_RX, VETH_TX);
+	SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_RX);
+	SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_TX);
+
+	if (busy_poll) {
+		SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_RX);
+		SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_RX);
+		SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_TX);
+		SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_TX);
+	}
+
+	SYS(fail, "ip link set %s mtu %d", VETH_RX, MTU);
+	SYS(fail, "ip link set %s mtu %d", VETH_TX, MTU);
+	SYS(fail, "ip link set %s up", VETH_RX);
+	SYS(fail, "ip link set %s up", VETH_TX);
+
+	return 0;
+
+fail:
+	return -1;
+}
+
+void delete_veth(void)
+{
+	SYS_NOFAIL("ip link del %s", VETH_RX);
+	SYS_NOFAIL("ip link del %s", VETH_TX);
+}
+
+int configure_ifobj(struct ifobject *tx, struct ifobject *rx)
+{
+	rx->ifindex = if_nametoindex(VETH_RX);
+	if (!ASSERT_OK_FD(rx->ifindex, "get RX ifindex"))
+		return -1;
+
+	tx->ifindex = if_nametoindex(VETH_TX);
+	if (!ASSERT_OK_FD(tx->ifindex, "get TX ifindex"))
+		return -1;
+
+	tx->shared_umem = false;
+	rx->shared_umem = false;
+
+
+	return 0;
+}
+
+static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode)
+{
+	struct ifobject *ifobj_tx, *ifobj_rx;
+	struct test_spec test;
+	int ret;
+
+	ifobj_tx = ifobject_create();
+	if (!ASSERT_OK_PTR(ifobj_tx, "create ifobj_tx"))
+		return;
+
+	ifobj_rx = ifobject_create();
+	if (!ASSERT_OK_PTR(ifobj_rx, "create ifobj_rx"))
+		goto delete_tx;
+
+	if (!ASSERT_OK(configure_ifobj(ifobj_tx, ifobj_rx), "conigure ifobj"))
+		goto delete_rx;
+
+	ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring);
+	if (!ret) {
+		ifobj_tx->hw_ring_size_supp = true;
+		ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending;
+		ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
+	}
+
+	if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX"))
+		goto delete_rx;
+	if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX"))
+		goto delete_rx;
+
+	test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+
+	test.tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+	if (!ASSERT_OK_PTR(test.tx_pkt_stream_default, "TX pkt generation"))
+		goto delete_rx;
+	test.rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+	if (!ASSERT_OK_PTR(test.rx_pkt_stream_default, "RX pkt generation"))
+		goto delete_rx;
+
+
+	test_init(&test, ifobj_tx, ifobj_rx, mode, test_to_run);
+	ret = test.test_func(&test);
+	if (ret != TEST_SKIP)
+		ASSERT_OK(ret, "Run test");
+	pkt_stream_restore_default(&test);
+
+	if (ifobj_tx->hw_ring_size_supp)
+		hw_ring_size_reset(ifobj_tx);
+
+	pkt_stream_delete(test.tx_pkt_stream_default);
+	pkt_stream_delete(test.rx_pkt_stream_default);
+	xsk_xdp_progs__destroy(ifobj_tx->xdp_progs);
+	xsk_xdp_progs__destroy(ifobj_rx->xdp_progs);
+
+delete_rx:
+	ifobject_delete(ifobj_rx);
+delete_tx:
+	ifobject_delete(ifobj_tx);
+}
+
+void test_ns_xsk_skb(void)
+{
+	int i;
+
+	if (!ASSERT_OK(setup_veth(false), "setup veth"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (test__start_subtest(tests[i].name))
+			test_xsk(&tests[i], TEST_MODE_SKB);
+	}
+
+	delete_veth();
+}
+
+void test_ns_xsk_drv(void)
+{
+	int i;
+
+	if (!ASSERT_OK(setup_veth(false), "setup veth"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (test__start_subtest(tests[i].name))
+			test_xsk(&tests[i], TEST_MODE_DRV);
+	}
+
+	delete_veth();
+}
+
diff --git a/tools/testing/selftests/bpf/progs/arena_strsearch.c b/tools/testing/selftests/bpf/progs/arena_strsearch.c
new file mode 100644
index 000000000000..ef6b76658f7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_strsearch.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARENA);
+	__uint(map_flags, BPF_F_MMAPABLE);
+	__uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_strsearch.h"
+
+struct glob_test {
+	char const __arena *pat, *str;
+	bool expected;
+};
+
+static bool test(char const __arena *pat, char const __arena *str, bool expected)
+{
+	bool match = glob_match(pat, str);
+	bool success = match == expected;
+
+	/* bpf_printk("glob_match %s %s res %d ok %d", pat, str, match, success); */
+	return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section.  The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static const char __arena glob_tests[] =
+	/* Some basic tests */
+	"1" "a\0" "a\0"
+	"0" "a\0" "b\0"
+	"0" "a\0" "aa\0"
+	"0" "a\0" "\0"
+	"1" "\0" "\0"
+	"0" "\0" "a\0"
+	/* Simple character class tests */
+	"1" "[a]\0" "a\0"
+	"0" "[a]\0" "b\0"
+	"0" "[!a]\0" "a\0"
+	"1" "[!a]\0" "b\0"
+	"1" "[ab]\0" "a\0"
+	"1" "[ab]\0" "b\0"
+	"0" "[ab]\0" "c\0"
+	"1" "[!ab]\0" "c\0"
+	"1" "[a-c]\0" "b\0"
+	"0" "[a-c]\0" "d\0"
+	/* Corner cases in character class parsing */
+	"1" "[a-c-e-g]\0" "-\0"
+	"0" "[a-c-e-g]\0" "d\0"
+	"1" "[a-c-e-g]\0" "f\0"
+	"1" "[]a-ceg-ik[]\0" "a\0"
+	"1" "[]a-ceg-ik[]\0" "]\0"
+	"1" "[]a-ceg-ik[]\0" "[\0"
+	"1" "[]a-ceg-ik[]\0" "h\0"
+	"0" "[]a-ceg-ik[]\0" "f\0"
+	"0" "[!]a-ceg-ik[]\0" "h\0"
+	"0" "[!]a-ceg-ik[]\0" "]\0"
+	"1" "[!]a-ceg-ik[]\0" "f\0"
+	/* Simple wild cards */
+	"1" "?\0" "a\0"
+	"0" "?\0" "aa\0"
+	"0" "??\0" "a\0"
+	"1" "?x?\0" "axb\0"
+	"0" "?x?\0" "abx\0"
+	"0" "?x?\0" "xab\0"
+	/* Asterisk wild cards (backtracking) */
+	"0" "*??\0" "a\0"
+	"1" "*??\0" "ab\0"
+	"1" "*??\0" "abc\0"
+	"1" "*??\0" "abcd\0"
+	"0" "??*\0" "a\0"
+	"1" "??*\0" "ab\0"
+	"1" "??*\0" "abc\0"
+	"1" "??*\0" "abcd\0"
+	"0" "?*?\0" "a\0"
+	"1" "?*?\0" "ab\0"
+	"1" "?*?\0" "abc\0"
+	"1" "?*?\0" "abcd\0"
+	"1" "*b\0" "b\0"
+	"1" "*b\0" "ab\0"
+	"0" "*b\0" "ba\0"
+	"1" "*b\0" "bb\0"
+	"1" "*b\0" "abb\0"
+	"1" "*b\0" "bab\0"
+	"1" "*bc\0" "abbc\0"
+	"1" "*bc\0" "bc\0"
+	"1" "*bc\0" "bbc\0"
+	"1" "*bc\0" "bcbc\0"
+	/* Multiple asterisks (complex backtracking) */
+	"1" "*ac*\0" "abacadaeafag\0"
+	"1" "*ac*ae*ag*\0" "abacadaeafag\0"
+	"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+	"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+	"1" "*abcd*\0" "abcabcabcabcdefg\0"
+	"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+	"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+	"0" "*abcd*\0" "abcabcabcabcefg\0"
+	"0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+bool skip = false;
+
+SEC("syscall")
+int arena_strsearch(void *ctx)
+{
+	unsigned successes = 0;
+	unsigned n = 0;
+	char const __arena *p = glob_tests;
+
+	/*
+	 * Tests are jammed together in a string.  The first byte is '1'
+	 * or '0' to indicate the expected outcome, or '\0' to indicate the
+	 * end of the tests.  Then come two null-terminated strings: the
+	 * pattern and the string to match it against.
+	 */
+	while (*p) {
+		bool expected = *p++ & 1;
+		char const __arena *pat = p;
+
+		cond_break;
+		p += bpf_arena_strlen(p) + 1;
+		successes += test(pat, p, expected);
+		p += bpf_arena_strlen(p) + 1;
+		n++;
+	}
+
+	n -= successes;
+	/* bpf_printk("glob: %u self-tests passed, %u failed\n", successes, n); */
+
+	return n ? -1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
index 4e51785e7606..9af19dfe4e80 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -22,10 +22,6 @@
 #define TCP_PACING_CA_RATIO (120)
 #define TCP_REORDERING (12)
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define after(seq2, seq1) before(seq1, seq2)
-
 extern void cubictcp_init(struct sock *sk) __ksym;
 extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
 extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
@@ -34,11 +30,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
 extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
 extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
 
-static bool before(__u32 seq1, __u32 seq2)
-{
-	return (__s32)(seq1-seq2) < 0;
-}
-
 static __u64 div64_u64(__u64 dividend, __u64 divisor)
 {
 	return dividend / divisor;
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index f089faa97ae6..46fb2b37d3a7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -20,13 +20,6 @@
 char _license[] SEC("license") = "GPL";
 
 #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-static bool before(__u32 seq1, __u32 seq2)
-{
-	return (__s32)(seq1-seq2) < 0;
-}
-#define after(seq2, seq1) 	before(seq1, seq2)
 
 extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 32c511bcd60b..1cc83140849f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -13,16 +13,10 @@
 #ifndef EBUSY
 #define EBUSY 16
 #endif
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
 #define min_not_zero(x, y) ({			\
 	typeof(x) __x = (x);			\
 	typeof(y) __y = (y);			\
 	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-static bool before(__u32 seq1, __u32 seq2)
-{
-	return (__s32)(seq1-seq2) < 0;
-}
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_gotox.c b/tools/testing/selftests/bpf/progs/bpf_gotox.c
new file mode 100644
index 000000000000..216c71b94c64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_gotox.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+__u64 in_user;
+__u64 ret_user;
+
+int pid;
+
+/*
+ * Skip all the tests if compiler doesn't support indirect jumps.
+ *
+ * If tests are skipped, then all functions below are compiled as
+ * dummy, such that the skeleton looks the same, and the userspace
+ * program can avoid any checks rather than if data->skip is set.
+ */
+#ifdef __BPF_FEATURE_GOTOX
+__u64 skip SEC(".data") = 0;
+#else
+__u64 skip = 1;
+#endif
+
+struct simple_ctx {
+	__u64 x;
+};
+
+#ifdef __BPF_FEATURE_GOTOX
+__u64 some_var;
+
+/*
+ * This function adds code which will be replaced by a different
+ * number of instructions by the verifier. This adds additional
+ * stress on testing the insn_array maps corresponding to indirect jumps.
+ */
+static __always_inline void adjust_insns(__u64 x)
+{
+	some_var ^= x + bpf_jiffies64();
+}
+
+SEC("syscall")
+int one_switch(struct simple_ctx *ctx)
+{
+	switch (ctx->x) {
+	case 0:
+		adjust_insns(ctx->x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int one_switch_non_zero_sec_off(struct simple_ctx *ctx)
+{
+	switch (ctx->x) {
+	case 0:
+		adjust_insns(ctx->x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int simple_test_other_sec(struct pt_regs *ctx)
+{
+	__u64 x = in_user;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	switch (x) {
+	case 0:
+		adjust_insns(x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int two_switches(struct simple_ctx *ctx)
+{
+	switch (ctx->x) {
+	case 0:
+		adjust_insns(ctx->x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	switch (ctx->x + !!ret_user) {
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 103;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 104;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 107;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 11);
+		ret_user = 205;
+		break;
+	case 5:
+		adjust_insns(ctx->x + 11);
+		ret_user = 115;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 1019;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int big_jump_table(struct simple_ctx *ctx __attribute__((unused)))
+{
+	const void *const jt[256] = {
+		[0 ... 255] = &&default_label,
+		[0] = &&l0,
+		[11] = &&l11,
+		[27] = &&l27,
+		[31] = &&l31,
+	};
+
+	goto *jt[ctx->x & 0xff];
+
+l0:
+	adjust_insns(ctx->x + 1);
+	ret_user = 2;
+	return 0;
+
+l11:
+	adjust_insns(ctx->x + 7);
+	ret_user = 3;
+	return 0;
+
+l27:
+	adjust_insns(ctx->x + 9);
+	ret_user = 4;
+	return 0;
+
+l31:
+	adjust_insns(ctx->x + 11);
+	ret_user = 5;
+	return 0;
+
+default_label:
+	adjust_insns(ctx->x + 177);
+	ret_user = 19;
+	return 0;
+}
+
+SEC("syscall")
+int one_jump_two_maps(struct simple_ctx *ctx __attribute__((unused)))
+{
+	__label__ l1, l2, l3, l4;
+	void *jt1[2] = { &&l1, &&l2 };
+	void *jt2[2] = { &&l3, &&l4 };
+	unsigned int a = ctx->x % 2;
+	unsigned int b = (ctx->x / 2) % 2;
+	volatile int ret = 0;
+
+	if (!(a < 2 && b < 2))
+		return 19;
+
+	if (ctx->x % 2)
+		goto *jt1[a];
+	else
+		goto *jt2[b];
+
+	l1: ret += 1;
+	l2: ret += 3;
+	l3: ret += 5;
+	l4: ret += 7;
+
+	ret_user = ret;
+	return ret;
+}
+
+SEC("syscall")
+int one_map_two_jumps(struct simple_ctx *ctx __attribute__((unused)))
+{
+	__label__ l1, l2, l3;
+	void *jt[3] = { &&l1, &&l2, &&l3 };
+	unsigned int a = (ctx->x >> 2) & 1;
+	unsigned int b = (ctx->x >> 3) & 1;
+	volatile int ret = 0;
+
+	if (ctx->x % 2)
+		goto *jt[a];
+
+	if (ctx->x % 3)
+		goto *jt[a + b];
+
+	l1: ret += 3;
+	l2: ret += 5;
+	l3: ret += 7;
+
+	ret_user = ret;
+	return ret;
+}
+
+/* Just to introduce some non-zero offsets in .text */
+static __noinline int f0(volatile struct simple_ctx *ctx __arg_ctx)
+{
+	if (ctx)
+		return 1;
+	else
+		return 13;
+}
+
+SEC("syscall") int f1(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	return f0(ctx);
+}
+
+static __noinline int __static_global(__u64 x)
+{
+	switch (x) {
+	case 0:
+		adjust_insns(x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int use_static_global1(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	return __static_global(ctx->x);
+}
+
+SEC("syscall")
+int use_static_global2(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	adjust_insns(ctx->x + 1);
+	return __static_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_static_global_other_sec(void *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	return __static_global(in_user);
+}
+
+__noinline int __nonstatic_global(__u64 x)
+{
+	switch (x) {
+	case 0:
+		adjust_insns(x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int use_nonstatic_global1(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	return __nonstatic_global(ctx->x);
+}
+
+SEC("syscall")
+int use_nonstatic_global2(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	adjust_insns(ctx->x + 1);
+	return __nonstatic_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_nonstatic_global_other_sec(void *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	return __nonstatic_global(in_user);
+}
+
+#else /* __BPF_FEATURE_GOTOX */
+
+#define SKIP_TEST(TEST_NAME)				\
+	SEC("syscall") int TEST_NAME(void *ctx)		\
+	{						\
+		return 0;				\
+	}
+
+SKIP_TEST(one_switch);
+SKIP_TEST(one_switch_non_zero_sec_off);
+SKIP_TEST(simple_test_other_sec);
+SKIP_TEST(two_switches);
+SKIP_TEST(big_jump_table);
+SKIP_TEST(one_jump_two_maps);
+SKIP_TEST(one_map_two_jumps);
+SKIP_TEST(use_static_global1);
+SKIP_TEST(use_static_global2);
+SKIP_TEST(use_static_global_other_sec);
+SKIP_TEST(use_nonstatic_global1);
+SKIP_TEST(use_nonstatic_global2);
+SKIP_TEST(use_nonstatic_global_other_sec);
+
+#endif /* __BPF_FEATURE_GOTOX */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
index 774d4dbe8189..a8aa5a71d846 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -18,23 +18,10 @@
 
 unsigned short reuse_listen_hport = 0;
 unsigned short listen_hport = 0;
-char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+const char cubic_cc[] = "bpf_cubic";
 char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
 bool random_retry = false;
 
-static bool tcp_cc_eq(const char *a, const char *b)
-{
-	int i;
-
-	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
-		if (a[i] != b[i])
-			return false;
-		if (!a[i])
-			break;
-	}
-
-	return true;
-}
 
 SEC("iter/tcp")
 int change_tcp_cc(struct bpf_iter__tcp *ctx)
@@ -58,7 +45,7 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx)
 			   cur_cc, sizeof(cur_cc)))
 		return 0;
 
-	if (!tcp_cc_eq(cur_cc, cubic_cc))
+	if (bpf_strncmp(cur_cc, TCP_CA_NAME_MAX, cubic_cc))
 		return 0;
 
 	if (random_retry && bpf_get_prandom_u32() % 4 == 1)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 164640db3a29..b1e509b231cd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -99,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		timer_active = 1;
-		timer_expires = icsk->icsk_retransmit_timer.expires;
+		timer_expires = sp->tcp_retransmit_timer.expires;
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
 		timer_active = 4;
-		timer_expires = icsk->icsk_retransmit_timer.expires;
-	} else if (timer_pending(&sp->sk_timer)) {
+		timer_expires = sp->tcp_retransmit_timer.expires;
+	} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
 		timer_active = 2;
-		timer_expires = sp->sk_timer.expires;
+		timer_expires = icsk->icsk_keepalive_timer.expires;
 	} else {
 		timer_active = 0;
 		timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 591c703f5032..dbc7166aee91 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -99,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		timer_active = 1;
-		timer_expires = icsk->icsk_retransmit_timer.expires;
+		timer_expires = sp->tcp_retransmit_timer.expires;
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
 		timer_active = 4;
-		timer_expires = icsk->icsk_retransmit_timer.expires;
-	} else if (timer_pending(&sp->sk_timer)) {
+		timer_expires = sp->tcp_retransmit_timer.expires;
+	} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
 		timer_active = 2;
-		timer_expires = sp->sk_timer.expires;
+		timer_expires = icsk->icsk_keepalive_timer.expires;
 	} else {
 		timer_active = 0;
 		timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index a7a1a684eed1..c9bfbe1bafc1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -126,6 +126,9 @@
  *                   Several __arch_* annotations could be specified at once.
  *                   When test case is not run on current arch it is marked as skipped.
  * __caps_unpriv     Specify the capabilities that should be set when running the test.
+ *
+ * __linear_size     Specify the size of the linear area of non-linear skbs, or
+ *                   0 for linear skbs.
  */
 #define __msg(msg)		__attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
 #define __not_msg(msg)		__attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
@@ -159,6 +162,7 @@
 #define __stderr_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
 #define __stdout(msg)		__attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
 #define __stdout_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __linear_size(sz)	__attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
 
 /* Define common capabilities tested using __caps_unpriv */
 #define CAP_NET_ADMIN		12
diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c
new file mode 100644
index 000000000000..70d8b08f5914
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_smc.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+enum {
+	BPF_SMC_LISTEN	= 10,
+};
+
+struct smc_sock___local {
+	struct sock sk;
+	struct smc_sock *listen_smc;
+	bool use_fallback;
+} __attribute__((preserve_access_index));
+
+int smc_cnt = 0;
+int fallback_cnt = 0;
+
+SEC("fentry/smc_release")
+int BPF_PROG(bpf_smc_release, struct socket *sock)
+{
+	/* only count from one side (client) */
+	if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN)
+		return 0;
+	smc_cnt++;
+	return 0;
+}
+
+SEC("fentry/smc_switch_to_fallback")
+int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc)
+{
+	/* only count from one side (client) */
+	if (smc && !smc->listen_smc)
+		fallback_cnt++;
+	return 0;
+}
+
+/* go with default value if no strat was found */
+bool default_ip_strat_value = true;
+
+struct smc_policy_ip_key {
+	__u32	sip;
+	__u32	dip;
+};
+
+struct smc_policy_ip_value {
+	__u8	mode;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct smc_policy_ip_key));
+	__uint(value_size, sizeof(struct smc_policy_ip_value));
+	__uint(max_entries, 128);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+} smc_policy_ip SEC(".maps");
+
+static bool smc_check(__u32 src, __u32 dst)
+{
+	struct smc_policy_ip_value *value;
+	struct smc_policy_ip_key key = {
+		.sip = src,
+		.dip = dst,
+	};
+
+	value = bpf_map_lookup_elem(&smc_policy_ip, &key);
+	return value ? value->mode : default_ip_strat_value;
+}
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(smc_run, int family, int type, int protocol)
+{
+	struct task_struct *task;
+
+	if (family != AF_INET && family != AF_INET6)
+		return protocol;
+
+	if ((type & 0xf) != SOCK_STREAM)
+		return protocol;
+
+	if (protocol != 0 && protocol != IPPROTO_TCP)
+		return protocol;
+
+	task = bpf_get_current_task_btf();
+	/* Prevent from affecting other tests */
+	if (!task || !task->nsproxy->net_ns->smc.hs_ctrl)
+		return protocol;
+
+	return IPPROTO_SMC;
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp,
+	     struct inet_request_sock *ireq)
+{
+	return smc_check(ireq->req.__req_common.skc_daddr,
+			 ireq->req.__req_common.skc_rcv_saddr);
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp)
+{
+	return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr,
+			 tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr);
+}
+
+SEC(".struct_ops")
+struct smc_hs_ctrl  linkcheck = {
+	.name		= "linkcheck",
+	.syn_option	= (void *)bpf_smc_set_tcp_option,
+	.synack_option	= (void *)bpf_smc_set_tcp_option_cond,
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 17db400f0e0d..d8dacef37c16 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -146,6 +146,20 @@
 
 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
 
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+static inline bool before(__u32 seq1, __u32 seq2)
+{
+	return (__s32)(seq1 - seq2) < 0;
+}
+
+#define after(seq2, seq1) before(seq1, seq2)
+
 static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
 {
 	return (struct inet_connection_sock *)sk;
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 9e9ebf27b878..9d158cfad981 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -34,6 +34,9 @@
 #define SOL_TCP 6
 #endif
 
+const char reno[] = "reno";
+const char cubic[] = "cubic";
+
 __attribute__ ((noinline)) __weak
 int do_bind(struct bpf_sock_addr *ctx)
 {
@@ -50,35 +53,27 @@ int do_bind(struct bpf_sock_addr *ctx)
 }
 
 static __inline int verify_cc(struct bpf_sock_addr *ctx,
-			      char expected[TCP_CA_NAME_MAX])
+			      const char expected[])
 {
 	char buf[TCP_CA_NAME_MAX];
-	int i;
 
 	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
 		return 1;
 
-	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
-		if (buf[i] != expected[i])
-			return 1;
-		if (buf[i] == 0)
-			break;
-	}
+	if (bpf_strncmp(buf, TCP_CA_NAME_MAX, expected))
+		return 1;
 
 	return 0;
 }
 
 static __inline int set_cc(struct bpf_sock_addr *ctx)
 {
-	char reno[TCP_CA_NAME_MAX] = "reno";
-	char cubic[TCP_CA_NAME_MAX] = "cubic";
-
-	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)reno, sizeof(reno)))
 		return 1;
 	if (verify_cc(ctx, reno))
 		return 1;
 
-	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)cubic, sizeof(cubic)))
 		return 1;
 	if (verify_cc(ctx, cubic))
 		return 1;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 127dea342e5a..e0d672d93adf 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -914,8 +914,8 @@ void *user_ptr;
 char expected_str[384];
 __u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
 
-typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off,
-				    u32 size, const void *unsafe_ptr);
+typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off,
+				    u64 size, const void *unsafe_ptr);
 
 /* Returns the offset just before the end of the maximum sized xdp fragment.
  * Any write larger than 32 bytes will be split between 2 fragments.
@@ -1106,16 +1106,16 @@ int test_copy_from_user_str_dynptr(void *ctx)
 	return 0;
 }
 
-static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off,
-					u32 size, const void *unsafe_ptr)
+static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off,
+					u64 size, const void *unsafe_ptr)
 {
 	struct task_struct *task = bpf_get_current_task_btf();
 
 	return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
 }
 
-static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off,
-					    u32 size, const void *unsafe_ptr)
+static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off,
+					    u64 size, const void *unsafe_ptr)
 {
 	struct task_struct *task = bpf_get_current_task_btf();
 
diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c
new file mode 100644
index 000000000000..4d756b623557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct elem {
+	struct file *file;
+	struct bpf_task_work tw;
+};
+
+char user_buf[256000];
+char tmp_buf[256000];
+
+int pid = 0;
+int err, run_success = 0;
+
+static int validate_file_read(struct file *file);
+static int task_work_callback(struct bpf_map *map, void *key, void *value);
+
+SEC("lsm/file_open")
+int on_open_expect_fault(void *c)
+{
+	struct bpf_dynptr dynptr;
+	struct file *file;
+	int local_err = 1;
+	__u32 user_buf_sz = sizeof(user_buf);
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	file = bpf_get_task_exe_file(bpf_get_current_task_btf());
+	if (!file)
+		return 0;
+
+	if (bpf_dynptr_from_file(file, 0, &dynptr))
+		goto out;
+
+	local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, user_buf_sz, 0);
+	if (local_err == -EFAULT) { /* Expect page fault */
+		local_err = 0;
+		run_success = 1;
+	}
+out:
+	bpf_dynptr_file_discard(&dynptr);
+	if (local_err)
+		err = local_err;
+	bpf_put_file(file);
+	return 0;
+}
+
+SEC("lsm/file_open")
+int on_open_validate_file_read(void *c)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct elem *work;
+	int key = 0;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	work = bpf_map_lookup_elem(&arrmap, &key);
+	if (!work) {
+		err = 1;
+		return 0;
+	}
+	bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, task_work_callback, NULL);
+	return 0;
+}
+
+/* Called in a sleepable context, read 256K bytes, cross check with user space read data */
+static int task_work_callback(struct bpf_map *map, void *key, void *value)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct file *file = bpf_get_task_exe_file(task);
+
+	if (!file)
+		return 0;
+
+	err = validate_file_read(file);
+	if (!err)
+		run_success = 1;
+	bpf_put_file(file);
+	return 0;
+}
+
+static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len)
+{
+	int i;
+
+	if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0))
+		return 1;
+
+	/* Verify file contents read from BPF is the same as the one read from userspace */
+	bpf_for(i, 0, len)
+	{
+		if (tmp_buf[i] != user_buf[i])
+			return 1;
+	}
+	return 0;
+}
+
+static int validate_file_read(struct file *file)
+{
+	struct bpf_dynptr dynptr;
+	int loc_err = 1, off;
+	__u32 user_buf_sz = sizeof(user_buf);
+
+	if (bpf_dynptr_from_file(file, 0, &dynptr))
+		goto cleanup;
+
+	loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz);
+	off = 1;
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+	off = user_buf_sz - 1;
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+	/* Read file with random offset and length */
+	off = 4097;
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100);
+
+	/* Adjust dynptr, verify read */
+	loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1);
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1);
+	/* Can't read more than 1 byte */
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0;
+	/* Can't read with far offset */
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0;
+
+cleanup:
+	bpf_dynptr_file_discard(&dynptr);
+	return loc_err;
+}
diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c
new file mode 100644
index 000000000000..32fe28ed2439
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+void *user_ptr;
+
+SEC("lsm/file_open")
+__failure
+__msg("Unreleased reference id=")
+int on_nanosleep_unreleased_ref(void *ctx)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct file *file = bpf_get_task_exe_file(task);
+	struct bpf_dynptr dynptr;
+
+	if (!file)
+		return 0;
+
+	err = bpf_dynptr_from_file(file, 0, &dynptr);
+	return err ? 1 : 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected a dynptr of type file as arg #0")
+int xdp_wrong_dynptr_type(struct xdp_md *xdp)
+{
+	struct bpf_dynptr dynptr;
+
+	bpf_dynptr_from_xdp(xdp, 0, &dynptr);
+	bpf_dynptr_file_discard(&dynptr);
+	return 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected an initialized dynptr as arg #0")
+int xdp_no_dynptr_type(struct xdp_md *xdp)
+{
+	struct bpf_dynptr dynptr;
+
+	bpf_dynptr_file_discard(&dynptr);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c
index 7481bb30b29b..195d3b2fba00 100644
--- a/tools/testing/selftests/bpf/progs/htab_update.c
+++ b/tools/testing/selftests/bpf/progs/htab_update.c
@@ -6,24 +6,31 @@
 
 char _license[] SEC("license") = "GPL";
 
+/* Map value type: has BTF-managed field (bpf_timer) */
+struct val {
+	struct bpf_timer t;
+	__u64 payload;
+};
+
 struct {
 	__uint(type, BPF_MAP_TYPE_HASH);
 	__uint(max_entries, 1);
-	__uint(key_size, sizeof(__u32));
-	__uint(value_size, sizeof(__u32));
+	__type(key, __u32);
+	__type(value, struct val);
 } htab SEC(".maps");
 
 int pid = 0;
 int update_err = 0;
 
-SEC("?fentry/lookup_elem_raw")
-int lookup_elem_raw(void *ctx)
+SEC("?fentry/bpf_obj_free_fields")
+int bpf_obj_free_fields(void *ctx)
 {
-	__u32 key = 0, value = 1;
+	__u32 key = 0;
+	struct val value = { .payload = 1 };
 
 	if ((bpf_get_current_pid_tgid() >> 32) != pid)
 		return 0;
 
-	update_err = bpf_map_update_elem(&htab, &key, &value, 0);
+	update_err = bpf_map_update_elem(&htab, &key, &value, BPF_ANY);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
index 645b2c9f7867..0e87ad1ebcfa 100644
--- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
@@ -12,11 +12,6 @@
 #define IP_OFFSET		0x1FFF
 #define NEXTHDR_FRAGMENT	44
 
-extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
-			      struct bpf_dynptr *ptr__uninit) __ksym;
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
-			      void *buffer, uint32_t buffer__sz) __ksym;
-
 volatile int shootdowns = 0;
 
 static bool is_frag_v4(struct iphdr *iph)
diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c
index 05fa5ce7fc59..d00fd570255a 100644
--- a/tools/testing/selftests/bpf/progs/iters_looping.c
+++ b/tools/testing/selftests/bpf/progs/iters_looping.c
@@ -161,3 +161,56 @@ int simplest_loop(void *ctx)
 
 	return 0;
 }
+
+__used
+static void iterator_with_diff_stack_depth(int x)
+{
+	struct bpf_iter_num iter;
+
+	asm volatile (
+		"if r1 == 42 goto 0f;"
+		"*(u64 *)(r10 - 128) = 0;"
+	"0:"
+		/* create iterator */
+		"r1 = %[iter];"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+	"1:"
+		/* consume next item */
+		"r1 = %[iter];"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto 2f;"
+		"goto 1b;"
+	"2:"
+		/* destroy iterator */
+		"r1 = %[iter];"
+		"call %[bpf_iter_num_destroy];"
+		:
+		: __imm_ptr(iter), ITER_HELPERS
+		: __clobber_common, "r6"
+	);
+}
+
+SEC("socket")
+__success
+__naked int widening_stack_size_bug(void *ctx)
+{
+	/*
+	 * Depending on iterator_with_diff_stack_depth() parameter value,
+	 * subprogram stack depth is either 8 or 128 bytes. Arrange values so
+	 * that it is 128 on a first call and 8 on a second. This triggered a
+	 * bug in verifier's widen_imprecise_scalars() logic.
+	 */
+	asm volatile (
+		"r6 = 0;"
+		"r1 = 0;"
+	"1:"
+		"call iterator_with_diff_stack_depth;"
+		"r1 = 42;"
+		"r6 += 1;"
+		"if r6 < 2 goto 1b;"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
new file mode 100644
index 000000000000..15579d5bcd91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int fentry_hit;
+int fexit_hit;
+int my_pid;
+
+SEC("fentry/cmdline_proc_show")
+int BPF_PROG(fentry_cmdline)
+{
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	fentry_hit = 1;
+	return 0;
+}
+
+SEC("fexit/cmdline_proc_show")
+int BPF_PROG(fexit_cmdline)
+{
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	fexit_hit = 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index 0c13b7409947..7de173daf27b 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -89,14 +89,16 @@ SEC("lsm/file_mprotect")
 int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	     unsigned long reqprot, unsigned long prot, int ret)
 {
-	if (ret != 0)
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (ret != 0 || !mm)
 		return ret;
 
 	__s32 pid = bpf_get_current_pid_tgid() >> 32;
 	int is_stack = 0;
 
-	is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
-		    vma->vm_end >= vma->vm_mm->start_stack);
+	is_stack = (vma->vm_start <= mm->start_stack &&
+		    vma->vm_end >= mm->start_stack);
 
 	if (is_stack && monitored_pid == pid) {
 		mprotect_count++;
diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
index 49c075ce2d4c..6e7e58051e64 100644
--- a/tools/testing/selftests/bpf/progs/lsm_tailcall.c
+++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
@@ -20,14 +20,14 @@ int lsm_file_permission_prog(void *ctx)
 	return 0;
 }
 
-SEC("lsm/file_alloc_security")
-int lsm_file_alloc_security_prog(void *ctx)
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_prog(void *ctx)
 {
 	return 0;
 }
 
-SEC("lsm/file_alloc_security")
-int lsm_file_alloc_security_entry(void *ctx)
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_entry(void *ctx)
 {
 	bpf_tail_call_static(ctx, &jmp_table, 0);
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
new file mode 100644
index 000000000000..d4eef0cbadb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+int sk_index;
+int redirect_idx;
+int trace_port;
+int helper_ret;
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+	__uint(max_entries, 100);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int mptcp_sockmap_inject(struct bpf_sock_ops *skops)
+{
+	struct bpf_sock *sk;
+
+	/* only accept specified connection */
+	if (skops->local_port != trace_port ||
+	    skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
+		return 1;
+
+	sk = skops->sk;
+	if (!sk)
+		return 1;
+
+	/* update sk handler */
+	helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST);
+
+	return 1;
+}
+
+SEC("sk_skb/stream_verdict")
+int mptcp_sockmap_redirect(struct __sk_buff *skb)
+{
+	/* redirect skb to the sk under sock_map[redirect_idx] */
+	return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 3a868a199349..d70c28824bbe 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -278,6 +278,46 @@ out:
 	return 0;
 }
 
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_1(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* nested rcu read lock regions */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_2(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* nested rcu read lock regions */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
 SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
 int task_trusted_non_rcuptr(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index 893a4fdb4b6e..1aca85d86aeb 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -568,4 +568,64 @@ err_out:
 	return 0;
 }
 
+private(kptr_ref) u64 ref;
+
+static int probe_read_refcount(void)
+{
+	u32 refcount;
+
+	bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
+	return refcount;
+}
+
+static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+			    struct node_data __kptr **node)
+{
+	struct node_data *node_new, *node_ref, *node_old;
+
+	node_new = bpf_obj_new(typeof(*node_new));
+	if (!node_new)
+		return -1;
+
+	node_ref = bpf_refcount_acquire(node_new);
+	node_old = bpf_kptr_xchg(node, node_new);
+	if (node_old) {
+		bpf_obj_drop(node_old);
+		bpf_obj_drop(node_ref);
+		return -2;
+	}
+
+	bpf_spin_lock(lock);
+	bpf_list_push_front(head, &node_ref->l);
+	ref = (u64)(void *) &node_ref->ref;
+	bpf_spin_unlock(lock);
+	return probe_read_refcount();
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} percpu_hash SEC(".maps");
+
+SEC("tc")
+int percpu_hash_refcount_leak(void *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&percpu_hash, &key);
+	if (!v)
+		return 0;
+
+	return __insert_in_list(&head, &lock, &v->node);
+}
+
+SEC("tc")
+int check_percpu_hash_refcount(void *ctx)
+{
+	return probe_read_refcount();
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 6a468496f539..d96c7d1e8fc2 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2020 Facebook
 
+#include <stdbool.h>
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf/bpf_helpers.h>
@@ -14,9 +15,11 @@ struct {
 
 const volatile int batch_cnt = 0;
 const volatile long use_output = 0;
+const volatile bool bench_producer = false;
 
 long sample_val = 42;
 long dropped __attribute__((aligned(128))) = 0;
+long hits __attribute__((aligned(128))) = 0;
 
 const volatile long wakeup_data_size = 0;
 
@@ -24,6 +27,9 @@ static __always_inline long get_flags()
 {
 	long sz;
 
+	if (bench_producer)
+		return BPF_RB_NO_WAKEUP;
+
 	if (!wakeup_data_size)
 		return 0;
 
@@ -47,6 +53,8 @@ int bench_ringbuf(void *ctx)
 				*sample = sample_val;
 				flags = get_flags();
 				bpf_ringbuf_submit(sample, flags);
+				if (bench_producer)
+					__sync_add_and_fetch(&hits, 1);
 			}
 		}
 	} else {
@@ -55,6 +63,9 @@ int bench_ringbuf(void *ctx)
 			if (bpf_ringbuf_output(&ringbuf, &sample_val,
 					       sizeof(sample_val), flags))
 				__sync_add_and_fetch(&dropped, 1);
+			else if (bench_producer)
+				__sync_add_and_fetch(&hits, 1);
+
 		}
 	}
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..09a00d11ffcc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+extern int tcp_memory_per_cpu_fw_alloc __ksym;
+extern int udp_memory_per_cpu_fw_alloc __ksym;
+
+int nr_cpus;
+bool tcp_activated, udp_activated;
+long tcp_memory_allocated, udp_memory_allocated;
+
+struct sk_prot {
+	long *memory_allocated;
+	int *memory_per_cpu_fw_alloc;
+};
+
+static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
+{
+	int *memory_per_cpu_fw_alloc;
+
+	memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
+	if (memory_per_cpu_fw_alloc)
+		*sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
+
+	return 0;
+}
+
+static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
+{
+	struct sock *sk = bpf_core_cast(_sk, struct sock);
+	struct sk_prot sk_prot_ctx;
+	long memory_allocated;
+
+	/* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
+	memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
+
+	sk_prot_ctx.memory_allocated = &memory_allocated;
+	sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
+
+	bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
+
+	return memory_allocated;
+}
+
+static void fentry_init_sock(struct sock *sk, bool *activated,
+			     long *memory_allocated, int *memory_per_cpu_fw_alloc)
+{
+	if (!*activated)
+		return;
+
+	*memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
+	*activated = false;
+}
+
+SEC("fentry/tcp_init_sock")
+int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
+{
+	fentry_init_sock(sk, &tcp_activated,
+			 &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
+	return 0;
+}
+
+SEC("fentry/udp_init_sock")
+int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
+{
+	fentry_init_sock(sk, &udp_activated,
+			 &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
+	return 0;
+}
+
+SEC("cgroup/sock_create")
+int sock_create(struct bpf_sock *ctx)
+{
+	int err, val = 1;
+
+	err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+			     &val, sizeof(val));
+	if (err)
+		goto err;
+
+	val = 0;
+
+	err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+			     &val, sizeof(val));
+	if (err)
+		goto err;
+
+	if (val != 1) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	return 1;
+
+err:
+	bpf_set_retval(err);
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
new file mode 100644
index 000000000000..a96c8150d7f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+extern bool CONFIG_UNWINDER_ORC __kconfig __weak;
+
+/*
+ * This function is here to have CONFIG_UNWINDER_ORC
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+	return CONFIG_UNWINDER_ORC ? 0 : 1;
+}
+
+__u32 stack_key;
+
+SEC("kprobe.multi")
+int kprobe_multi_test(struct pt_regs *ctx)
+{
+	stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+	return 0;
+}
+
+SEC("raw_tp/bpf_testmod_test_read")
+int rawtp_test(void *ctx)
+{
+	/* Skip ebpf program entry in the stack. */
+	stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
index b4a0d0cc8ec8..3662515f0107 100644
--- a/tools/testing/selftests/bpf/progs/stream_fail.c
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -10,7 +10,7 @@ SEC("syscall")
 __failure __msg("Possibly NULL pointer passed")
 int stream_vprintk_null_arg(void *ctx)
 {
-	bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL);
+	bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL);
 	return 0;
 }
 
@@ -18,7 +18,7 @@ SEC("syscall")
 __failure __msg("R3 type=scalar expected=")
 int stream_vprintk_scalar_arg(void *ctx)
 {
-	bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL);
+	bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL);
 	return 0;
 }
 
@@ -26,7 +26,7 @@ SEC("syscall")
 __failure __msg("arg#1 doesn't point to a const string")
 int stream_vprintk_string_arg(void *ctx)
 {
-	bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL);
+	bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
index 99d72c68f76a..826e6b6aff7e 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -45,8 +45,12 @@ SEC("syscall")  __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return
 SEC("syscall")  __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); }
 SEC("syscall")  __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); }
 SEC("syscall")  __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcasestr_null1(void *ctx) { return bpf_strcasestr(NULL, "hello"); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcasestr_null2(void *ctx) { return bpf_strcasestr("hello", NULL); }
 SEC("syscall")  __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); }
 SEC("syscall")  __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strncasestr_null1(void *ctx) { return bpf_strncasestr(NULL, "hello", 1); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strncasestr_null2(void *ctx) { return bpf_strncasestr("hello", NULL, 1); }
 
 /* Passing userspace ptr to string kfuncs */
 SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
@@ -65,8 +69,12 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { re
 SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); }
 SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); }
 SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr1(void *ctx) { return bpf_strcasestr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr2(void *ctx) { return bpf_strcasestr("hello", user_ptr); }
 SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); }
 SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr1(void *ctx) { return bpf_strncasestr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr2(void *ctx) { return bpf_strncasestr("hello", user_ptr, 1); }
 
 #endif /* __TARGET_ARCH_s390 */
 
@@ -87,7 +95,11 @@ SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return
 SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); }
 SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); }
 SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault1(void *ctx) { return bpf_strcasestr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault2(void *ctx) { return bpf_strcasestr("hello", invalid_kern_ptr); }
 SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); }
 SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault1(void *ctx) { return bpf_strncasestr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault2(void *ctx) { return bpf_strncasestr("hello", invalid_kern_ptr, 1); }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
index e41cc5601994..05e1da1f250f 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -19,6 +19,8 @@ SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b
 SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); }
 SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); }
 SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); }
+SEC("syscall") int test_strcasestr_too_long(void *ctx) { return bpf_strcasestr(long_str, "hello"); }
 SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); }
+SEC("syscall") int test_strncasestr_too_long(void *ctx) { return bpf_strncasestr(long_str, "hello", sizeof(long_str)); }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
index 2e3498e37b9c..a8513964516b 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -33,8 +33,11 @@ __test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); }
 __test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); }
 __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
 __test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
+__test(6) int test_strcasestr_found(void *ctx) { return bpf_strcasestr(str, "woRLD"); }
 __test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
+__test(-ENOENT) int test_strcasestr_notfound(void *ctx) { return bpf_strcasestr(str, "hi"); }
 __test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
+__test(0) int test_strcasestr_empty(void *ctx) { return bpf_strcasestr(str, ""); }
 __test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); }
 __test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); }
 __test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); }
@@ -42,5 +45,12 @@ __test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str,
 __test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); }
 __test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); }
 __test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
+__test(0) int test_strncasestr_found1(void *ctx) { return bpf_strncasestr("", "", 0); }
+__test(0) int test_strncasestr_found2(void *ctx) { return bpf_strncasestr(str, "heLLO", 5); }
+__test(0) int test_strncasestr_found3(void *ctx) { return bpf_strncasestr(str, "heLLO", 6); }
+__test(-ENOENT) int test_strncasestr_notfound1(void *ctx) { return bpf_strncasestr(str, "hi", 10); }
+__test(-ENOENT) int test_strncasestr_notfound2(void *ctx) { return bpf_strncasestr(str, "hello", 4); }
+__test(-ENOENT) int test_strncasestr_notfound3(void *ctx) { return bpf_strncasestr("", "a", 0); }
+__test(0) int test_strncasestr_empty(void *ctx) { return bpf_strncasestr(str, "", 1); }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index a5c74d31a244..6e1918deaf26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -330,9 +330,9 @@ static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
 	}
 	bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
 	/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
-	return tls_ptr && tls_ptr != (void *)-1
-		? tls_ptr + tls_index.offset
-		: NULL;
+	if (!tls_ptr || tls_ptr == (void *)-1)
+		return NULL;
+	return tls_ptr + tls_index.offset;
 }
 
 #ifdef SUBPROGS
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
index 23217f06a3ec..663a80990f8f 100644
--- a/tools/testing/selftests/bpf/progs/task_work.c
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -66,7 +66,7 @@ int oncpu_hash_map(struct pt_regs *args)
 	if (!work)
 		return 0;
 
-	bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+	bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
 	return 0;
 }
 
@@ -80,7 +80,7 @@ int oncpu_array_map(struct pt_regs *args)
 	work = bpf_map_lookup_elem(&arrmap, &key);
 	if (!work)
 		return 0;
-	bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL);
+	bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL);
 	return 0;
 }
 
@@ -102,6 +102,6 @@ int oncpu_lru_map(struct pt_regs *args)
 	work = bpf_map_lookup_elem(&lrumap, &key);
 	if (!work || work->data[0])
 		return 0;
-	bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL);
+	bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
index 77fe8f28facd..1270953fd092 100644
--- a/tools/testing/selftests/bpf/progs/task_work_fail.c
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args)
 	work = bpf_map_lookup_elem(&arrmap, &key);
 	if (!work)
 		return 0;
-	bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+	bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
 	return 0;
 }
 
@@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args)
 	struct bpf_task_work tw;
 
 	task = bpf_get_current_task_btf();
-	bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL);
+	bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL);
 	return 0;
 }
 
@@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args)
 	struct task_struct *task;
 
 	task = bpf_get_current_task_btf();
-	bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL);
+	bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL);
 	return 0;
 }
 
@@ -91,6 +91,6 @@ int map_null(struct pt_regs *args)
 	work = bpf_map_lookup_elem(&arrmap, &key);
 	if (!work)
 		return 0;
-	bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL);
+	bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
index 90fca06fff56..55e555f7f41b 100644
--- a/tools/testing/selftests/bpf/progs/task_work_stress.c
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -51,8 +51,8 @@ int schedule_task_work(void *ctx)
 		if (!work)
 			return 0;
 	}
-	err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap,
-					    process_work, NULL);
+	err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap,
+						 process_work, NULL);
 	if (err)
 		__sync_fetch_and_add(&schedule_error, 1);
 	else
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
index a58b5194fc89..022291f21dfb 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -8,8 +8,6 @@ char _license[] SEC("license") = "GPL";
 
 #define USEC_PER_SEC 1000000UL
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 static unsigned int tcp_left_out(const struct tcp_sock *tp)
 {
 	return tp->sacked_out + tp->lost_out;
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index 2ec1de11a3ae..7b6b2b342c1d 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -7,6 +7,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <errno.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -288,3 +289,14 @@ int tc_input_len_exceed(struct __sk_buff *ctx)
 	global_bpf_mtu_xdp = mtu_len;
 	return retval;
 }
+
+SEC("tc")
+int tc_chk_segs_flag(struct __sk_buff *ctx)
+{
+	__u32 mtu_len = 0;
+	int err;
+
+	err = bpf_check_mtu(ctx, GLOBAL_USER_IFINDEX, &mtu_len, 0, BPF_MTU_CHK_SEGS);
+
+	return err == -EINVAL ? BPF_OK : BPF_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
index a1ccc831c882..05ac9410cd68 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_branches.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -8,6 +8,7 @@
 #include <bpf/bpf_tracing.h>
 
 int valid = 0;
+int run_cnt = 0;
 int required_size_out = 0;
 int written_stack_out = 0;
 int written_global_out = 0;
@@ -24,6 +25,8 @@ int perf_branches(void *ctx)
 	__u64 entries[4 * 3] = {0};
 	int required_size, written_stack, written_global;
 
+	++run_cnt;
+
 	/* write to stack */
 	written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
 	/* ignore spurious events */
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
new file mode 100644
index 000000000000..ff4aa67ddacc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(map_flags, BPF_F_RB_OVERWRITE);
+} ringbuf SEC(".maps");
+
+int pid;
+
+const volatile unsigned long LEN1;
+const volatile unsigned long LEN2;
+const volatile unsigned long LEN3;
+const volatile unsigned long LEN4;
+const volatile unsigned long LEN5;
+
+long reserve1_fail = 0;
+long reserve2_fail = 0;
+long reserve3_fail = 0;
+long reserve4_fail = 0;
+long reserve5_fail = 0;
+
+unsigned long avail_data = 0;
+unsigned long ring_size = 0;
+unsigned long cons_pos = 0;
+unsigned long prod_pos = 0;
+unsigned long over_pos = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_overwrite_ringbuf(void *ctx)
+{
+	char *rec1, *rec2, *rec3, *rec4, *rec5;
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (cur_pid != pid)
+		return 0;
+
+	rec1 = bpf_ringbuf_reserve(&ringbuf, LEN1, 0);
+	if (!rec1) {
+		reserve1_fail = 1;
+		return 0;
+	}
+
+	rec2 = bpf_ringbuf_reserve(&ringbuf, LEN2, 0);
+	if (!rec2) {
+		bpf_ringbuf_discard(rec1, 0);
+		reserve2_fail = 1;
+		return 0;
+	}
+
+	rec3 = bpf_ringbuf_reserve(&ringbuf, LEN3, 0);
+	/* expect failure */
+	if (!rec3) {
+		reserve3_fail = 1;
+	} else {
+		bpf_ringbuf_discard(rec1, 0);
+		bpf_ringbuf_discard(rec2, 0);
+		bpf_ringbuf_discard(rec3, 0);
+		return 0;
+	}
+
+	rec4 = bpf_ringbuf_reserve(&ringbuf, LEN4, 0);
+	if (!rec4) {
+		reserve4_fail = 1;
+		bpf_ringbuf_discard(rec1, 0);
+		bpf_ringbuf_discard(rec2, 0);
+		return 0;
+	}
+
+	bpf_ringbuf_submit(rec1, 0);
+	bpf_ringbuf_submit(rec2, 0);
+	bpf_ringbuf_submit(rec4, 0);
+
+	rec5 = bpf_ringbuf_reserve(&ringbuf, LEN5, 0);
+	if (!rec5) {
+		reserve5_fail = 1;
+		return 0;
+	}
+
+	for (int i = 0; i < LEN3; i++)
+		rec5[i] = 0xdd;
+
+	bpf_ringbuf_submit(rec5, 0);
+
+	ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+	avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+	cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+	prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+	over_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_OVERWRITE_POS);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index 950a70b61e74..4f6f03122d61 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -14,7 +14,6 @@
 #define TIME_HORIZON_NS (2000 * 1000 * 1000)
 #define NS_PER_SEC 1000000000
 #define ECN_HORIZON_NS 5000000
-#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
 
 /* flow_key => last_tstamp timestamp used */
 struct {
@@ -24,12 +23,13 @@ struct {
 	__uint(max_entries, 1);
 } flow_map SEC(".maps");
 
+__uint64_t target_rate;
+
 static inline int throttle_flow(struct __sk_buff *skb)
 {
 	int key = 0;
 	uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
-	uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
-			THROTTLE_RATE_BPS;
+	uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / target_rate;
 	uint64_t now = bpf_ktime_get_ns();
 	uint64_t tstamp, next_tstamp = 0;
 
@@ -70,7 +70,7 @@ static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
 	if ((void *)(tcp + 1) > data_end)
 		return TC_ACT_SHOT;
 
-	if (tcp->dest == bpf_htons(9000))
+	if (tcp->source == bpf_htons(9000))
 		return throttle_flow(skb);
 
 	return TC_ACT_OK;
@@ -99,7 +99,8 @@ static inline int handle_ipv4(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+SEC("tc")
+int tc_prog(struct __sk_buff *skb)
 {
 	if (skb->protocol == bpf_htons(ETH_P_IP))
 		return handle_ipv4(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 404124a93892..7330c61b5730 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -2,23 +2,11 @@
 
 /* In-place tunneling */
 
-#include <stdbool.h>
-#include <string.h>
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/mpls.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/pkt_cls.h>
-#include <linux/types.h>
+#include <vmlinux.h>
 
-#include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
 #include "bpf_compiler.h"
 
 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@@ -27,6 +15,14 @@ static const int cfg_port = 8000;
 
 static const int cfg_udp_src = 20000;
 
+#define ETH_P_MPLS_UC	0x8847
+#define ETH_P_TEB	0x6558
+
+#define MPLS_LS_S_MASK	0x00000100
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len)			\
+	(((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK)	\
+	 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
 #define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
 
 #define	UDP_PORT		5555
@@ -36,10 +32,9 @@ static const int cfg_udp_src = 20000;
 
 #define	EXTPROTO_VXLAN	0x1
 
-#define	VXLAN_N_VID     (1u << 24)
-#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
-#define	VXLAN_FLAGS     0x8
-#define	VXLAN_VNI       1
+#define	VXLAN_FLAGS     bpf_htonl(1<<27)
+#define	VNI_ID		1
+#define	VXLAN_VNI	bpf_htonl(VNI_ID << 8)
 
 #ifndef NEXTHDR_DEST
 #define NEXTHDR_DEST	60
@@ -48,12 +43,6 @@ static const int cfg_udp_src = 20000;
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 						     MPLS_LS_S_MASK | 0xff);
-
-struct vxlanhdr {
-	__be32 vx_flags;
-	__be32 vx_vni;
-} __attribute__((packed));
-
 struct gre_hdr {
 	__be16 flags;
 	__be16 protocol;
@@ -94,8 +83,8 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 					__u16 l2_proto, __u16 ext_proto)
 {
+	struct iphdr iph_inner = {0};
 	__u16 udp_dst = UDP_PORT;
-	struct iphdr iph_inner;
 	struct v4hdr h_outer;
 	struct tcphdr tcph;
 	int olen, l2_len;
@@ -122,7 +111,6 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 			return TC_ACT_OK;
 
 		/* Derive the IPv4 header fields from the IPv6 header */
-		memset(&iph_inner, 0, sizeof(iph_inner));
 		iph_inner.version = 4;
 		iph_inner.ihl = 5;
 		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
@@ -210,7 +198,7 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
 
 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
-			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+			vxlan_hdr->vx_vni = VXLAN_VNI;
 
 			l2_hdr += sizeof(struct vxlanhdr);
 		}
@@ -340,7 +328,7 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
 
 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
-			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+			vxlan_hdr->vx_vni = VXLAN_VNI;
 
 			l2_hdr += sizeof(struct vxlanhdr);
 		}
@@ -372,8 +360,8 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 
 static int encap_ipv6_ipip6(struct __sk_buff *skb)
 {
+	struct v6hdr h_outer = {0};
 	struct iphdr iph_inner;
-	struct v6hdr h_outer;
 	struct tcphdr tcph;
 	struct ethhdr eth;
 	__u64 flags;
@@ -400,13 +388,12 @@ static int encap_ipv6_ipip6(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
-	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
 	h_outer.ip.version = 6;
 	h_outer.ip.hop_limit = iph_inner.ttl;
-	h_outer.ip.saddr.s6_addr[1] = 0xfd;
-	h_outer.ip.saddr.s6_addr[15] = 1;
-	h_outer.ip.daddr.s6_addr[1] = 0xfd;
-	h_outer.ip.daddr.s6_addr[15] = 2;
+	h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd;
+	h_outer.ip.saddr.in6_u.u6_addr8[15] = 1;
+	h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd;
+	h_outer.ip.daddr.in6_u.u6_addr8[15] = 2;
 	h_outer.ip.payload_len = iph_inner.tot_len;
 	h_outer.ip.nexthdr = IPPROTO_IPIP;
 
@@ -431,7 +418,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
 }
 
-SEC("encap_ipip_none")
+SEC("tc")
 int __encap_ipip_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -440,7 +427,7 @@ int __encap_ipip_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_gre_none")
+SEC("tc")
 int __encap_gre_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -449,7 +436,7 @@ int __encap_gre_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_gre_mpls")
+SEC("tc")
 int __encap_gre_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -458,7 +445,7 @@ int __encap_gre_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_gre_eth")
+SEC("tc")
 int __encap_gre_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -467,7 +454,7 @@ int __encap_gre_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_udp_none")
+SEC("tc")
 int __encap_udp_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -476,7 +463,7 @@ int __encap_udp_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_udp_mpls")
+SEC("tc")
 int __encap_udp_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -485,7 +472,7 @@ int __encap_udp_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_udp_eth")
+SEC("tc")
 int __encap_udp_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -494,7 +481,7 @@ int __encap_udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_vxlan_eth")
+SEC("tc")
 int __encap_vxlan_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -505,7 +492,7 @@ int __encap_vxlan_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_sit_none")
+SEC("tc")
 int __encap_sit_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -514,7 +501,7 @@ int __encap_sit_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6tnl_none")
+SEC("tc")
 int __encap_ip6tnl_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -523,7 +510,7 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ipip6_none")
+SEC("tc")
 int __encap_ipip6_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -532,7 +519,7 @@ int __encap_ipip6_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6gre_none")
+SEC("tc")
 int __encap_ip6gre_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -541,7 +528,7 @@ int __encap_ip6gre_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6gre_mpls")
+SEC("tc")
 int __encap_ip6gre_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -550,7 +537,7 @@ int __encap_ip6gre_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6gre_eth")
+SEC("tc")
 int __encap_ip6gre_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -559,7 +546,7 @@ int __encap_ip6gre_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6udp_none")
+SEC("tc")
 int __encap_ip6udp_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -568,7 +555,7 @@ int __encap_ip6udp_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6udp_mpls")
+SEC("tc")
 int __encap_ip6udp_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -577,7 +564,7 @@ int __encap_ip6udp_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6udp_eth")
+SEC("tc")
 int __encap_ip6udp_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -586,7 +573,7 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6vxlan_eth")
+SEC("tc")
 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -693,7 +680,7 @@ static int decap_ipv6(struct __sk_buff *skb)
 			      iph_outer.nexthdr);
 }
 
-SEC("decap")
+SEC("tc")
 int decap_f(struct __sk_buff *skb)
 {
 	switch (skb->protocol) {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index d79cb74b571e..0a0f371a2dec 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -4,6 +4,7 @@
 #include <linux/if_ether.h>
 #include <linux/pkt_cls.h>
 
+#include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_kfuncs.h"
 
@@ -11,37 +12,72 @@
 
 #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
 
-/* Demonstrates how metadata can be passed from an XDP program to a TC program
- * using bpf_xdp_adjust_meta.
- * For the sake of testing the metadata support in drivers, the XDP program uses
- * a fixed-size payload after the Ethernet header as metadata. The TC program
- * copies the metadata it receives into a map so it can be checked from
- * userspace.
+/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta.
+ *
+ * The XDP program extracts a fixed-size payload following the Ethernet header
+ * and stores it as packet metadata to test the driver's metadata support. The
+ * TC program then verifies if the passed metadata is correct.
  */
 
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
-	__type(key, __u32);
-	__uint(value_size, META_SIZE);
-} test_result SEC(".maps");
-
 bool test_pass;
 
+static const __u8 smac_want[ETH_ALEN] = {
+	0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
+};
+
+static const __u8 meta_want[META_SIZE] = {
+	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+	0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+	0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
+
+static bool check_smac(const struct ethhdr *eth)
+{
+	return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
+}
+
+static bool check_metadata(const char *file, int line, __u8 *meta_have)
+{
+	if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
+		return true;
+
+	bpf_stream_printk(BPF_STREAM_STDERR,
+			  "FAIL:%s:%d: metadata mismatch\n"
+			  "  have:\n    %pI6\n    %pI6\n"
+			  "  want:\n    %pI6\n    %pI6\n",
+			  file, line,
+			  &meta_have[0x00], &meta_have[0x10],
+			  &meta_want[0x00], &meta_want[0x10]);
+	return false;
+}
+
+#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have)
+
+static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb)
+{
+	__u8 *data_meta = ctx_ptr(skb, data_meta);
+	__u8 *data = ctx_ptr(skb, data);
+
+	return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta);
+}
+
+#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb)
+
 SEC("tc")
 int ing_cls(struct __sk_buff *ctx)
 {
-	__u8 *data, *data_meta;
-	__u32 key = 0;
-
-	data_meta = ctx_ptr(ctx, data_meta);
-	data      = ctx_ptr(ctx, data);
+	__u8 *meta_have = ctx_ptr(ctx, data_meta);
+	__u8 *data = ctx_ptr(ctx, data);
 
-	if (data_meta + META_SIZE > data)
-		return TC_ACT_SHOT;
+	if (meta_have + META_SIZE > data)
+		goto out;
 
-	bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY);
+	if (!check_metadata(meta_have))
+		goto out;
 
+	test_pass = true;
+out:
 	return TC_ACT_SHOT;
 }
 
@@ -49,17 +85,17 @@ int ing_cls(struct __sk_buff *ctx)
 SEC("tc")
 int ing_cls_dynptr_read(struct __sk_buff *ctx)
 {
+	__u8 meta_have[META_SIZE];
 	struct bpf_dynptr meta;
-	const __u32 zero = 0;
-	__u8 *dst;
-
-	dst = bpf_map_lookup_elem(&test_result, &zero);
-	if (!dst)
-		return TC_ACT_SHOT;
 
 	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0);
+	bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
 
+	if (!check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
 	return TC_ACT_SHOT;
 }
 
@@ -86,20 +122,18 @@ SEC("tc")
 int ing_cls_dynptr_slice(struct __sk_buff *ctx)
 {
 	struct bpf_dynptr meta;
-	const __u32 zero = 0;
-	__u8 *dst, *src;
-
-	dst = bpf_map_lookup_elem(&test_result, &zero);
-	if (!dst)
-		return TC_ACT_SHOT;
+	__u8 *meta_have;
 
 	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
-	if (!src)
-		return TC_ACT_SHOT;
+	meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+	if (!meta_have)
+		goto out;
 
-	__builtin_memcpy(dst, src, META_SIZE);
+	if (!check_metadata(meta_have))
+		goto out;
 
+	test_pass = true;
+out:
 	return TC_ACT_SHOT;
 }
 
@@ -129,14 +163,12 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
 SEC("tc")
 int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
 {
-	struct bpf_dynptr meta;
 	const __u32 chunk_len = META_SIZE / 4;
-	const __u32 zero = 0;
+	__u8 meta_have[META_SIZE];
+	struct bpf_dynptr meta;
 	__u8 *dst, *src;
 
-	dst = bpf_map_lookup_elem(&test_result, &zero);
-	if (!dst)
-		return TC_ACT_SHOT;
+	dst = meta_have;
 
 	/* 1. Regular read */
 	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
@@ -155,9 +187,14 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
 	/* 4. Read from a slice starting at an offset */
 	src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
 	if (!src)
-		return TC_ACT_SHOT;
+		goto out;
 	__builtin_memcpy(dst, src, chunk_len);
 
+	if (!check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
 	return TC_ACT_SHOT;
 }
 
@@ -254,7 +291,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx)
 	/* Drop any non-test packets */
 	if (eth + 1 > ctx_ptr(ctx, data_end))
 		return XDP_DROP;
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
 		return XDP_DROP;
 
 	ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
@@ -294,9 +331,9 @@ int ing_xdp(struct xdp_md *ctx)
 
 	/* The Linux networking stack may send other packets on the test
 	 * interface that interfere with the test. Just drop them.
-	 * The test packets can be recognized by their ethertype of zero.
+	 * The test packets can be recognized by their source MAC address.
 	 */
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
 		return XDP_DROP;
 
 	__builtin_memcpy(data_meta, payload, META_SIZE);
@@ -304,22 +341,25 @@ int ing_xdp(struct xdp_md *ctx)
 }
 
 /*
- * Check that skb->data_meta..skb->data is empty if prog writes to packet
- * _payload_ using packet pointers. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _payload_ using packet pointers.
  */
 SEC("tc")
-int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
+int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
 {
+	__u8 *meta_have = ctx_ptr(ctx, data_meta);
 	struct ethhdr *eth = ctx_ptr(ctx, data);
 
 	if (eth + 1 > ctx_ptr(ctx, data_end))
 		goto out;
 	/* Ignore non-test packets */
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
+		goto out;
+
+	if (meta_have + META_SIZE > eth)
 		goto out;
 
-	/* Expect no metadata */
-	if (ctx->data_meta != ctx->data)
+	if (!check_metadata(meta_have))
 		goto out;
 
 	/* Packet write to trigger unclone in prologue */
@@ -331,40 +371,44 @@ out:
 }
 
 /*
- * Check that skb->data_meta..skb->data is empty if prog writes to packet
- * _metadata_ using packet pointers. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _metadata_ using packet pointers.
  */
 SEC("tc")
-int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
+int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
 {
+	__u8 *meta_have = ctx_ptr(ctx, data_meta);
 	struct ethhdr *eth = ctx_ptr(ctx, data);
-	__u8 *md = ctx_ptr(ctx, data_meta);
 
 	if (eth + 1 > ctx_ptr(ctx, data_end))
 		goto out;
 	/* Ignore non-test packets */
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
 		goto out;
 
-	if (md + 1 > ctx_ptr(ctx, data)) {
-		/* Expect no metadata */
-		test_pass = true;
-	} else {
-		/* Metadata write to trigger unclone in prologue */
-		*md = 42;
-	}
+	if (meta_have + META_SIZE > eth)
+		goto out;
+
+	if (!check_metadata(meta_have))
+		goto out;
+
+	/* Metadata write to trigger unclone in prologue */
+	*meta_have = 42;
+
+	test_pass = true;
 out:
 	return TC_ACT_SHOT;
 }
 
 /*
- * Check that skb_meta dynptr is writable but empty if prog writes to packet
- * _payload_ using a dynptr slice. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates a r/w slice to packet _payload_.
  */
 SEC("tc")
-int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
 {
 	struct bpf_dynptr data, meta;
+	__u8 meta_have[META_SIZE];
 	struct ethhdr *eth;
 
 	bpf_dynptr_from_skb(ctx, 0, &data);
@@ -372,51 +416,45 @@ int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
 	if (!eth)
 		goto out;
 	/* Ignore non-test packets */
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
 		goto out;
 
-	/* Expect no metadata */
 	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+	bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (!check_metadata(meta_have))
 		goto out;
 
-	/* Packet write to trigger unclone in prologue */
-	eth->h_proto = 42;
-
 	test_pass = true;
 out:
 	return TC_ACT_SHOT;
 }
 
 /*
- * Check that skb_meta dynptr is writable but empty if prog writes to packet
- * _metadata_ using a dynptr slice. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates an r/w slice to packet _metadata_.
  */
 SEC("tc")
-int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
 {
 	struct bpf_dynptr data, meta;
 	const struct ethhdr *eth;
-	__u8 *md;
+	__u8 *meta_have;
 
 	bpf_dynptr_from_skb(ctx, 0, &data);
 	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
 	if (!eth)
 		goto out;
 	/* Ignore non-test packets */
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
 		goto out;
 
-	/* Expect no metadata */
 	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+	meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+	if (!meta_have)
 		goto out;
 
-	/* Metadata write to trigger unclone in prologue */
-	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
-	if (md)
-		*md = 42;
+	if (!check_metadata(meta_have))
+		goto out;
 
 	test_pass = true;
 out:
@@ -424,34 +462,40 @@ out:
 }
 
 /*
- * Check that skb_meta dynptr is read-only before prog writes to packet payload
- * using dynptr_write helper. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _payload_ using dynptr_write helper and metadata
+ * remains intact before and after the write.
  */
 SEC("tc")
-int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
 {
 	struct bpf_dynptr data, meta;
+	__u8 meta_have[META_SIZE];
 	const struct ethhdr *eth;
+	int err;
 
 	bpf_dynptr_from_skb(ctx, 0, &data);
 	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
 	if (!eth)
 		goto out;
 	/* Ignore non-test packets */
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
 		goto out;
 
-	/* Expect read-only metadata before unclone */
+	/* Expect read-write metadata before unclone */
 	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+	if (bpf_dynptr_is_rdonly(&meta))
+		goto out;
+
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
 		goto out;
 
 	/* Helper write to payload will unclone the packet */
 	bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
 
-	/* Expect no metadata after unclone */
-	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
 		goto out;
 
 	test_pass = true;
@@ -460,31 +504,165 @@ out:
 }
 
 /*
- * Check that skb_meta dynptr is read-only if prog writes to packet
- * metadata using dynptr_write helper. Applies only to cloned skbs.
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _metadata_ using dynptr_write helper and
+ * metadata remains intact before and after the write.
  */
 SEC("tc")
-int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
+int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
 {
 	struct bpf_dynptr data, meta;
+	__u8 meta_have[META_SIZE];
 	const struct ethhdr *eth;
+	int err;
 
 	bpf_dynptr_from_skb(ctx, 0, &data);
 	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
 	if (!eth)
 		goto out;
 	/* Ignore non-test packets */
-	if (eth->h_proto != 0)
+	if (!check_smac(eth))
 		goto out;
 
-	/* Expect read-only metadata */
+	/* Expect read-write metadata before unclone */
 	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+	if (bpf_dynptr_is_rdonly(&meta))
 		goto out;
 
-	/* Metadata write. Expect failure. */
-	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
-	if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
+		goto out;
+
+	/* Helper write to metadata will unclone the packet */
+	bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0);
+
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
+{
+	int err;
+
+	/* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only
+	 * secondary tag push triggers an actual MAC header modification.
+	 */
+	err = bpf_skb_vlan_push(ctx, 0, 42);
+	if (err)
+		goto out;
+	err = bpf_skb_vlan_push(ctx, 0, 207);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	err = bpf_skb_vlan_pop(ctx);
+	if (err)
+		goto out;
+	err = bpf_skb_vlan_pop(ctx);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_adjust_room(struct __sk_buff *ctx)
+{
+	int err;
+
+	/* Grow a 1 byte hole after the MAC header */
+	err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Shrink a 1 byte hole after the MAC header */
+	err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Grow a 256 byte hole to trigger head reallocation */
+	err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_head_tail(struct __sk_buff *ctx)
+{
+	int err;
+
+	/* Reserve 1 extra in the front for packet data */
+	err = bpf_skb_change_head(ctx, 1, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Reserve 256 extra bytes in the front to trigger head reallocation */
+	err = bpf_skb_change_head(ctx, 256, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Reserve 4k extra bytes in the back to trigger head reallocation */
+	err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_proto(struct __sk_buff *ctx)
+{
+	int err;
+
+	err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
 		goto out;
 
 	test_pass = true;
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 3d5f30c29ae3..2898b3749d07 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -42,12 +42,14 @@ int bench_trigger_uprobe_multi(void *ctx)
 const volatile int batch_iters = 0;
 
 SEC("?raw_tp")
-int trigger_count(void *ctx)
+int trigger_kernel_count(void *ctx)
 {
 	int i;
 
-	for (i = 0; i < batch_iters; i++)
+	for (i = 0; i < batch_iters; i++) {
 		inc_counter();
+		bpf_get_numa_node_id();
+	}
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
new file mode 100644
index 000000000000..7efa9521105e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Timer tests */
+
+struct timer_elem {
+	struct bpf_timer t;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct timer_elem);
+} timer_map SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+	u32 data;
+	/* Timer callbacks are never sleepable, even from non-sleepable programs */
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_non_sleepable_prog(void *ctx)
+{
+	struct timer_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&timer_map, &key);
+	if (!val)
+		return 0;
+
+	bpf_timer_init(&val->t, &timer_map, 0);
+	bpf_timer_set_callback(&val->t, timer_cb);
+	return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_sleepable_prog(void *ctx)
+{
+	struct timer_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&timer_map, &key);
+	if (!val)
+		return 0;
+
+	bpf_timer_init(&val->t, &timer_map, 0);
+	bpf_timer_set_callback(&val->t, timer_cb);
+	return 0;
+}
+
+/* Workqueue tests */
+
+struct wq_elem {
+	struct bpf_wq w;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct wq_elem);
+} wq_map SEC(".maps");
+
+static int wq_cb(void *map, int *key, void *value)
+{
+	u32 data;
+	/* Workqueue callbacks are always sleepable, even from non-sleepable programs */
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int wq_non_sleepable_prog(void *ctx)
+{
+	struct wq_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&wq_map, &key);
+	if (!val)
+		return 0;
+
+	if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+		return 0;
+	if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+		return 0;
+	return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int wq_sleepable_prog(void *ctx)
+{
+	struct wq_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&wq_map, &key);
+	if (!val)
+		return 0;
+
+	if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+		return 0;
+	if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+		return 0;
+	return 0;
+}
+
+/* Task work tests */
+
+struct task_work_elem {
+	struct bpf_task_work tw;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct task_work_elem);
+} task_work_map SEC(".maps");
+
+static int task_work_cb(struct bpf_map *map, void *key, void *value)
+{
+	u32 data;
+	/* Task work callbacks are always sleepable, even from non-sleepable programs */
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int task_work_non_sleepable_prog(void *ctx)
+{
+	struct task_work_elem *val;
+	struct task_struct *task;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&task_work_map, &key);
+	if (!val)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+	if (!task)
+		return 0;
+
+	bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+	return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int task_work_sleepable_prog(void *ctx)
+{
+	struct task_work_elem *val;
+	struct task_struct *task;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&task_work_map, &key);
+	if (!val)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+	if (!task)
+		return 0;
+
+	bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 0a72e0228ea9..411a18437d7e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -1709,4 +1709,158 @@ __naked void jeq_disagreeing_tnums(void *ctx)
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("conditional jump on same register, branch taken")
+__not_msg("20: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void condition_jump_on_same_register(void *ctx)
+{
+	asm volatile("			\
+	call %[bpf_get_prandom_u32];	\
+	w8 = 0x80000000;		\
+	r0 &= r8;			\
+	if r0 == r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 >= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 s>= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 <= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 s<= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 != r0 goto l1_%=;		\
+	if r0 >  r0 goto l1_%=;		\
+	if r0 s> r0 goto l1_%=;		\
+	if r0 <  r0 goto l1_%=;		\
+	if r0 s< r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, constant value branch taken")
+__not_msg("7: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_1(void *ctx)
+{
+	asm volatile("			\
+	r0 = 0;				\
+	if r0 & r0 goto l1_%=;		\
+	r0 = 1;				\
+	if r0 & r0 goto +1;		\
+	goto l1_%=;			\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value branch taken")
+__not_msg("12: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_2(void *ctx)
+{
+	asm volatile("			\
+	/* range [1;2] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	r0 += 1;			\
+	if r0 & r0 goto +1;		\
+	goto l1_%=;			\
+	/* range [-2;-1] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	r0 -= 2;			\
+	if r0 & r0 goto +1;		\
+	goto l1_%=;			\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 1")
+__msg("3: (b7) r0 = 0 {{.*}} R0=0")
+__msg("5: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_3(void *ctx)
+{
+	asm volatile("			\
+	/* range [0;1] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	if r0 & r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 2")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_4(void *ctx)
+{
+	asm volatile("			\
+	/* range [-1;0] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	r0 -= 1;			\
+	if r0 & r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 3")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_5(void *ctx)
+{
+	asm volatile("			\
+	/* range [-1;1] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x2;			\
+	r0 -= 1;			\
+	if r0 & r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 28b602ac9cbe..911caa8fd1b7 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
 
+#include <linux/if_ether.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
@@ -800,4 +801,62 @@ l0_%=:	/* exit(0) */					\
 	: __clobber_all);
 }
 
+#define access_test_non_linear(name, type, desc, retval, linear_sz, off)			\
+	SEC(type)										\
+	__description("direct packet access: " #name " (non-linear, " type ", " desc ")")	\
+	__success __retval(retval)								\
+	__linear_size(linear_sz)								\
+	__naked void access_non_linear_##name(void)						\
+	{											\
+		asm volatile ("									\
+		r2 = *(u32*)(r1 + %[skb_data]);							\
+		r3 = *(u32*)(r1 + %[skb_data_end]);						\
+		r0 = r2;									\
+		r0 += %[offset];								\
+		if r0 > r3 goto l0_%=;								\
+		r0 = *(u8*)(r0 - 1);								\
+		r0 = 0;										\
+		exit;										\
+	l0_%=:	r0 = 1;										\
+		exit;										\
+	"	:										\
+		: __imm_const(skb_data, offsetof(struct __sk_buff, data)),			\
+		  __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)),		\
+		  __imm_const(offset, off)							\
+		: __clobber_all);								\
+	}
+
+access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22);
+access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22);
+access_test_non_linear(test33, "tc", "long enough", 0, 22, 22);
+access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8);
+access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8);
+access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8);
+
+SEC("tc")
+__description("direct packet access: test37 (non-linear, linearized)")
+__success __retval(0)
+__linear_size(ETH_HLEN)
+__naked void access_non_linear_linearized(void)
+{
+	asm volatile ("				\
+	r6 = r1;				\
+	r2 = 22;				\
+	call %[bpf_skb_pull_data];		\
+	r2 = *(u32*)(r6 + %[skb_data]);		\
+	r3 = *(u32*)(r6 + %[skb_data_end]);	\
+	r0 = r2;				\
+	r0 += 22;				\
+	if r0 > r3 goto l0_%=;			\
+	r0 = *(u8*)(r0 - 1);			\
+	exit;					\
+l0_%=:	r0 = 1;					\
+	exit;					\
+"	:
+	: __imm(bpf_skb_pull_data),
+	  __imm_const(skb_data, offsetof(struct __sk_buff, data)),
+	  __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end))
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 6630a92b1b47..1204fbc58178 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -225,7 +225,7 @@ int trusted_to_untrusted(void *ctx)
 }
 
 char mem[16];
-u32 off;
+u32 offset;
 
 SEC("tp_btf/sys_enter")
 __success
@@ -240,9 +240,9 @@ int anything_to_untrusted(void *ctx)
 	/* scalar to untrusted */
 	subprog_untrusted(0);
 	/* variable offset to untrusted (map) */
-	subprog_untrusted((void *)mem + off);
+	subprog_untrusted((void *)mem + offset);
 	/* variable offset to untrusted (trusted) */
-	subprog_untrusted((void *)bpf_get_current_task_btf() + off);
+	subprog_untrusted((void *)bpf_get_current_task_btf() + offset);
 	return 0;
 }
 
@@ -298,12 +298,12 @@ int anything_to_untrusted_mem(void *ctx)
 	/* scalar to untrusted mem */
 	subprog_void_untrusted(0);
 	/* variable offset to untrusted mem (map) */
-	subprog_void_untrusted((void *)mem + off);
+	subprog_void_untrusted((void *)mem + offset);
 	/* variable offset to untrusted mem (trusted) */
-	subprog_void_untrusted(bpf_get_current_task_btf() + off);
+	subprog_void_untrusted(bpf_get_current_task_btf() + offset);
 	/* variable offset to untrusted char/enum (map) */
-	subprog_char_untrusted(mem + off);
-	subprog_enum_untrusted((void *)mem + off);
+	subprog_char_untrusted(mem + offset);
+	subprog_enum_untrusted((void *)mem + offset);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c
new file mode 100644
index 000000000000..607dad058ca1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Isovalent */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../../../include/linux/filter.h"
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+
+#define DEFINE_SIMPLE_JUMP_TABLE_PROG(NAME, SRC_REG, OFF, IMM, OUTCOME)	\
+									\
+	SEC("socket")							\
+	OUTCOME								\
+	__naked void jump_table_ ## NAME(void)				\
+	{								\
+		asm volatile ("						\
+		.pushsection .jumptables,\"\",@progbits;		\
+	jt0_%=:								\
+		.quad ret0_%= - socket;					\
+		.quad ret1_%= - socket;					\
+		.size jt0_%=, 16;					\
+		.global jt0_%=;						\
+		.popsection;						\
+									\
+		r0 = jt0_%= ll;						\
+		r0 += 8;						\
+		r0 = *(u64 *)(r0 + 0);					\
+		.8byte %[gotox_r0];					\
+		ret0_%=:						\
+		r0 = 0;							\
+		exit;							\
+		ret1_%=:						\
+		r0 = 1;							\
+		exit;							\
+	"	:							\
+		: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, (SRC_REG), (OFF) , (IMM))) \
+		: __clobber_all);					\
+	}
+
+/*
+ * The first program which doesn't use reserved fields
+ * loads and works properly. The rest fail to load.
+ */
+DEFINE_SIMPLE_JUMP_TABLE_PROG(ok,                          BPF_REG_0, 0, 0, __success __retval(1))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_src_reg,      BPF_REG_1, 0, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_off, BPF_REG_0, 1, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_imm, BPF_REG_0, 0, 1, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+
+/*
+ * Gotox is forbidden when there is no jump table loaded
+ * which points to the sub-function where the gotox is used
+ */
+SEC("socket")
+__failure __msg("no jump tables found for subprog starting at 0")
+__naked void jump_table_no_jump_table(void)
+{
+	asm volatile ("						\
+	.8byte %[gotox_r0];					\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+/*
+ * Incorrect type of the target register, only PTR_TO_INSN allowed
+ */
+SEC("socket")
+__failure __msg("R1 has type scalar, expected PTR_TO_INSN")
+__naked void jump_table_incorrect_dst_reg_type(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	r1 = 42;						\
+	.8byte %[gotox_r1];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r1, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0 , 0))
+	: __clobber_all);
+}
+
+#define DEFINE_INVALID_SIZE_PROG(READ_SIZE, OUTCOME)			\
+									\
+	SEC("socket")							\
+	OUTCOME								\
+	__naked void jump_table_invalid_read_size_ ## READ_SIZE(void)	\
+	{								\
+		asm volatile ("						\
+		.pushsection .jumptables,\"\",@progbits;		\
+	jt0_%=:								\
+		.quad ret0_%= - socket;					\
+		.quad ret1_%= - socket;					\
+		.size jt0_%=, 16;					\
+		.global jt0_%=;						\
+		.popsection;						\
+									\
+		r0 = jt0_%= ll;						\
+		r0 += 8;						\
+		r0 = *(" #READ_SIZE " *)(r0 + 0);			\
+		.8byte %[gotox_r0];					\
+		ret0_%=:						\
+		r0 = 0;							\
+		exit;							\
+		ret1_%=:						\
+		r0 = 1;							\
+		exit;							\
+	"	:							\
+		: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) \
+		: __clobber_all);					\
+	}
+
+DEFINE_INVALID_SIZE_PROG(u32, __failure __msg("Invalid read of 4 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u8,  __failure __msg("Invalid read of 1 bytes from insn_array"))
+
+SEC("socket")
+__failure __msg("misaligned value access off 0+1+0 size 8")
+__naked void jump_table_misaligned_access(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 1;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=24 size=8")
+__naked void jump_table_invalid_mem_acceess_pos(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 24;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=-24 size=8")
+__naked void jump_table_invalid_mem_acceess_neg(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 -= 24;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_add_sub_ok(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 -= 24;						\
+	r0 += 32;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("write into map forbidden, value_size=16 off=8 size=8")
+__naked void jump_table_no_writes(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r1 = 0xbeef;						\
+	*(u64 *)(r0 + 0) = r1;					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+#define DEFINE_JUMP_TABLE_USE_REG(REG)					\
+	SEC("socket")							\
+	__success __retval(1)						\
+	__naked void jump_table_use_reg_r ## REG(void)			\
+	{								\
+		asm volatile ("						\
+		.pushsection .jumptables,\"\",@progbits;		\
+	jt0_%=:								\
+		.quad ret0_%= - socket;					\
+		.quad ret1_%= - socket;					\
+		.size jt0_%=, 16;					\
+		.global jt0_%=;						\
+		.popsection;						\
+									\
+		r0 = jt0_%= ll;						\
+		r0 += 8;						\
+		r" #REG " = *(u64 *)(r0 + 0);				\
+		.8byte %[gotox_rX];					\
+		ret0_%=:						\
+		r0 = 0;							\
+		exit;							\
+		ret1_%=:						\
+		r0 = 1;							\
+		exit;							\
+	"	:							\
+		: __imm_insn(gotox_rX, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_ ## REG, 0, 0 , 0)) \
+		: __clobber_all);					\
+	}
+
+DEFINE_JUMP_TABLE_USE_REG(0)
+DEFINE_JUMP_TABLE_USE_REG(1)
+DEFINE_JUMP_TABLE_USE_REG(2)
+DEFINE_JUMP_TABLE_USE_REG(3)
+DEFINE_JUMP_TABLE_USE_REG(4)
+DEFINE_JUMP_TABLE_USE_REG(5)
+DEFINE_JUMP_TABLE_USE_REG(6)
+DEFINE_JUMP_TABLE_USE_REG(7)
+DEFINE_JUMP_TABLE_USE_REG(8)
+DEFINE_JUMP_TABLE_USE_REG(9)
+
+__used static int test_subprog(void)
+{
+	return 0;
+}
+
+SEC("socket")
+__failure __msg("jump table for insn 4 points outside of the subprog [0,10]")
+__naked void jump_table_outside_subprog(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret_out_%= - socket;				\
+	.size jt0_%=, 24;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	call test_subprog;					\
+	exit;							\
+	ret_out_%=:						\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_contains_non_unique_values(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 80;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+#endif /* __TARGET_ARCH_x86 || __TARGET_ARCH_arm64 */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
index c0e808509268..2de105057bbc 100644
--- a/tools/testing/selftests/bpf/progs/verifier_live_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -292,3 +292,53 @@ __naked void syzbot_postorder_bug1(void)
 	"exit;"
 	::: __clobber_all);
 }
+
+struct {
+        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+        __uint(max_entries, 1);
+        __type(key, __u32);
+        __type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__failure __msg("invalid read from stack R2 off=-1024 size=8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked unsigned long caller_stack_write_tail_call(void)
+{
+        asm volatile (
+	"r6 = r1;"
+	"*(u64 *)(r10 - 8) = -8;"
+        "call %[bpf_get_prandom_u32];"
+        "if r0 != 42 goto 1f;"
+        "goto 2f;"
+  "1:"
+        "*(u64 *)(r10 - 8) = -1024;"
+  "2:"
+        "r1 = r6;"
+        "r2 = r10;"
+        "r2 += -8;"
+        "call write_tail_call;"
+        "r1 = *(u64 *)(r10 - 8);"
+        "r2 = r10;"
+        "r2 += r1;"
+        "r0 = *(u64 *)(r2 + 0);"
+        "exit;"
+        :: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+static __used __naked unsigned long write_tail_call(void)
+{
+        asm volatile (
+        "r6 = r2;"
+        "r2 = %[map_array] ll;"
+        "r3 = 0;"
+        "call %[bpf_tail_call];"
+        "*(u64 *)(r6 + 0) = -16;"
+        "r0 = 0;"
+        "exit;"
+	:
+	: __imm(bpf_tail_call),
+          __imm_addr(map_array)
+        : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
index 32e5e779cb96..6af9100a37ff 100644
--- a/tools/testing/selftests/bpf/progs/verifier_lsm.c
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-SEC("lsm/file_alloc_security")
+SEC("lsm/file_permission")
 __description("lsm bpf prog with -4095~0 retval. test 1")
 __success
 __naked int errno_zero_retval_test1(void *ctx)
@@ -15,7 +15,7 @@ __naked int errno_zero_retval_test1(void *ctx)
 	::: __clobber_all);
 }
 
-SEC("lsm/file_alloc_security")
+SEC("lsm/file_permission")
 __description("lsm bpf prog with -4095~0 retval. test 2")
 __success
 __naked int errno_zero_retval_test2(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
index ab9f9f2620ed..e2cbc5bda65e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
@@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
 	return NF_ACCEPT;
 }
 
-extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
-                               struct bpf_dynptr *ptr__uninit) __ksym;
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
-                                   void *buffer, uint32_t buffer__sz) __ksym;
-
 SEC("netfilter")
 __description("netfilter test prog with skb and state read access")
 __success __failure_unpriv
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
index 2b4610b53382..a2132c72d3b8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -1117,10 +1117,17 @@ int tail_call(struct __sk_buff *sk)
 	return 0;
 }
 
-/* Tail calls invalidate packet pointers. */
+static __noinline
+int static_tail_call(struct __sk_buff *sk)
+{
+	bpf_tail_call_static(sk, &jmp_table, 0);
+	return 0;
+}
+
+/* Tail calls in sub-programs invalidate packet pointers. */
 SEC("tc")
 __failure __msg("invalid mem access")
-int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
+int invalidate_pkt_pointers_by_global_tail_call(struct __sk_buff *sk)
 {
 	int *p = (void *)(long)sk->data;
 
@@ -1131,4 +1138,32 @@ int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
 	return TCX_PASS;
 }
 
+/* Tail calls in static sub-programs invalidate packet pointers. */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_by_static_tail_call(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	static_tail_call(sk);
+	*p = 42; /* this is unsafe */
+	return TCX_PASS;
+}
+
+/* Direct tail calls do not invalidate packet pointers. */
+SEC("tc")
+__success
+int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	bpf_tail_call_static(sk, &jmp_table, 0);
+	*p = 42; /* this is NOT unsafe: tail calls don't return */
+	return TCX_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index ac3e418c2a96..61886ed554de 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -793,4 +793,57 @@ __naked int stack_slot_aliases_precision(void)
 	);
 }
 
+struct {
+        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+        __uint(max_entries, 1);
+        __type(key, __u32);
+        __type(value, __u32);
+} map_array SEC(".maps");
+
+__naked __noinline __used
+static unsigned long identity_tail_call(void)
+{
+	/* the simplest identity function involving a tail call */
+        asm volatile (
+		"r6 = r2;"
+		"r2 = %[map_array] ll;"
+		"r3 = 0;"
+		"call %[bpf_tail_call];"
+		"r0 = r6;"
+		"exit;"
+		:
+		: __imm(bpf_tail_call),
+		  __imm_addr(map_array)
+		: __clobber_all);
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("13: (85) call bpf_tail_call#12")
+__msg("mark_precise: frame1: last_idx 13 first_idx 0 subseq_idx -1 ")
+__msg("returning from callee:")
+__msg("frame1: R0=scalar() R6=3 R10=fp0")
+__msg("to caller at 4:")
+__msg("R0=scalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: parent state regs=r0 stack=:  R0=Pscalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("math between map_value pointer and register with unbounded min value is not allowed")
+__naked int subprog_result_tail_call(void)
+{
+	asm volatile (
+		"r2 = 3;"
+		"call identity_tail_call;"
+		"r0 *= 4;"
+		"r1 = %[vals];"
+		"r1 += r0;"
+		"r0 = *(u32 *)(r1 + 0);"
+		"exit;"
+		:
+		: __imm_ptr(vals)
+		: __clobber_common
+	);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
index 2f1ba08c293e..25be2cd9d42c 100644
--- a/tools/testing/selftests/bpf/progs/wq.c
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -187,3 +187,20 @@ long test_call_lru_sleepable(void *ctx)
 
 	return test_elem_callback(&lru, &key, wq_callback);
 }
+
+SEC("tc")
+long test_map_no_btf(void *ctx)
+{
+	struct elem *val;
+	struct bpf_wq *wq;
+	int key = 42;
+
+	val = bpf_map_lookup_elem(&array, &key);
+	if (!val)
+		return -2;
+
+	wq = &val->w;
+	if (bpf_wq_init(wq, &array, 0) != 0)
+		return -3;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index 4240211a1900..d06f6d40594a 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -142,3 +142,26 @@ long test_wrong_wq_pointer_offset(void *ctx)
 
 	return -22;
 }
+
+SEC("tc")
+__log_level(2)
+__failure
+__msg(": (85) call bpf_wq_init#")
+__msg("R1 doesn't have constant offset. bpf_wq has to be at the constant offset")
+long test_bad_wq_off(void *ctx)
+{
+	struct elem *val;
+	struct bpf_wq *wq;
+	int key = 42;
+	u64 unknown;
+
+	val = bpf_map_lookup_elem(&array, &key);
+	if (!val)
+		return -2;
+
+	unknown = bpf_get_prandom_u32();
+	wq = &val->w + unknown;
+	if (bpf_wq_init(wq, &array, 0) != 0)
+		return -3;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 1453a53ed547..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -90,10 +90,6 @@ echo -e "... through kbuild\n"
 
 if [ -f ".config" ] ; then
 	make_and_clean tools/bpf
-	## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for
-	## runqslower, but the default (used for the "clean" target) is .output.
-	## Let's make sure we clean runqslower's directory properly.
-	make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean
 
 	## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
 	## down from toplevel Makefile to bpftool's Makefile.
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
index 769206fc70e4..7b4ae5e81d32 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
@@ -5,6 +5,7 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/prandom.h>
+#include <linux/ktime.h>
 #include <asm/rqspinlock.h>
 #include <linux/perf_event.h>
 #include <linux/kthread.h>
@@ -22,48 +23,146 @@ static struct perf_event_attr hw_attr = {
 
 static rqspinlock_t lock_a;
 static rqspinlock_t lock_b;
+static rqspinlock_t lock_c;
+
+#define RQSL_SLOW_THRESHOLD_MS 10
+static const unsigned int rqsl_hist_ms[] = {
+	1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+	12, 14, 16, 18, 20, 25, 30, 40, 50, 75,
+	100, 150, 200, 250, 1000,
+};
+#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
+
+enum rqsl_context {
+	RQSL_CTX_NORMAL = 0,
+	RQSL_CTX_NMI,
+	RQSL_CTX_MAX,
+};
+
+struct rqsl_cpu_hist {
+	atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS];
+	atomic64_t success[RQSL_CTX_MAX];
+	atomic64_t failure[RQSL_CTX_MAX];
+};
+
+static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
+
+enum rqsl_mode {
+	RQSL_MODE_AA = 0,
+	RQSL_MODE_ABBA,
+	RQSL_MODE_ABBCCA,
+};
+
+static int test_mode = RQSL_MODE_AA;
+module_param(test_mode, int, 0644);
+MODULE_PARM_DESC(test_mode,
+		 "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA");
+
+static int normal_delay = 20;
+module_param(normal_delay, int, 0644);
+MODULE_PARM_DESC(normal_delay,
+		 "rqspinlock critical section length for normal context (20ms default)");
+
+static int nmi_delay = 10;
+module_param(nmi_delay, int, 0644);
+MODULE_PARM_DESC(nmi_delay,
+		 "rqspinlock critical section length for NMI context (10ms default)");
 
 static struct perf_event **rqsl_evts;
 static int rqsl_nevts;
 
-static bool test_ab = false;
-module_param(test_ab, bool, 0644);
-MODULE_PARM_DESC(test_ab, "Test ABBA situations instead of AA situations");
-
 static struct task_struct **rqsl_threads;
 static int rqsl_nthreads;
 static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0);
 
 static int pause = 0;
 
-static bool nmi_locks_a(int cpu)
+static const char *rqsl_mode_names[] = {
+	[RQSL_MODE_AA] = "AA",
+	[RQSL_MODE_ABBA] = "ABBA",
+	[RQSL_MODE_ABBCCA] = "ABBCCA",
+};
+
+struct rqsl_lock_pair {
+	rqspinlock_t *worker_lock;
+	rqspinlock_t *nmi_lock;
+};
+
+static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
 {
-	return (cpu & 1) && test_ab;
+	int mode = READ_ONCE(test_mode);
+
+	switch (mode) {
+	default:
+	case RQSL_MODE_AA:
+		return (struct rqsl_lock_pair){ &lock_a, &lock_a };
+	case RQSL_MODE_ABBA:
+		if (cpu & 1)
+			return (struct rqsl_lock_pair){ &lock_b, &lock_a };
+		return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+	case RQSL_MODE_ABBCCA:
+		switch (cpu % 3) {
+		case 0:
+			return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+		case 1:
+			return (struct rqsl_lock_pair){ &lock_b, &lock_c };
+		default:
+			return (struct rqsl_lock_pair){ &lock_c, &lock_a };
+		}
+	}
+}
+
+static u32 rqsl_hist_bucket_idx(u32 delta_ms)
+{
+	int i;
+
+	for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+		if (delta_ms <= rqsl_hist_ms[i])
+			return i;
+	}
+
+	return RQSL_NR_HIST_BUCKETS - 1;
+}
+
+static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret)
+{
+	struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
+	u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
+	u32 bucket = rqsl_hist_bucket_idx(delta_ms);
+	atomic64_t *buckets = hist->hist[ctx];
+
+	atomic64_inc(&buckets[bucket]);
+	if (!ret)
+		atomic64_inc(&hist->success[ctx]);
+	else
+		atomic64_inc(&hist->failure[ctx]);
 }
 
 static int rqspinlock_worker_fn(void *arg)
 {
 	int cpu = smp_processor_id();
 	unsigned long flags;
+	u64 start_ns;
 	int ret;
 
 	if (cpu) {
 		atomic_inc(&rqsl_ready_cpus);
 
 		while (!kthread_should_stop()) {
+			struct rqsl_lock_pair locks = rqsl_get_lock_pair(cpu);
+			rqspinlock_t *worker_lock = locks.worker_lock;
+
 			if (READ_ONCE(pause)) {
 				msleep(1000);
 				continue;
 			}
-			if (nmi_locks_a(cpu))
-				ret = raw_res_spin_lock_irqsave(&lock_b, flags);
-			else
-				ret = raw_res_spin_lock_irqsave(&lock_a, flags);
-			mdelay(20);
-			if (nmi_locks_a(cpu) && !ret)
-				raw_res_spin_unlock_irqrestore(&lock_b, flags);
-			else if (!ret)
-				raw_res_spin_unlock_irqrestore(&lock_a, flags);
+			start_ns = ktime_get_mono_fast_ns();
+			ret = raw_res_spin_lock_irqsave(worker_lock, flags);
+			rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+						RQSL_CTX_NORMAL, ret);
+			mdelay(normal_delay);
+			if (!ret)
+				raw_res_spin_unlock_irqrestore(worker_lock, flags);
 			cpu_relax();
 		}
 		return 0;
@@ -91,24 +190,25 @@ static int rqspinlock_worker_fn(void *arg)
 static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
 		   struct pt_regs *regs)
 {
+	struct rqsl_lock_pair locks;
 	int cpu = smp_processor_id();
 	unsigned long flags;
+	u64 start_ns;
 	int ret;
 
 	if (!cpu || READ_ONCE(pause))
 		return;
 
-	if (nmi_locks_a(cpu))
-		ret = raw_res_spin_lock_irqsave(&lock_a, flags);
-	else
-		ret = raw_res_spin_lock_irqsave(test_ab ? &lock_b : &lock_a, flags);
+	locks = rqsl_get_lock_pair(cpu);
+	start_ns = ktime_get_mono_fast_ns();
+	ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
+	rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+				RQSL_CTX_NMI, ret);
 
-	mdelay(10);
+	mdelay(nmi_delay);
 
-	if (nmi_locks_a(cpu) && !ret)
-		raw_res_spin_unlock_irqrestore(&lock_a, flags);
-	else if (!ret)
-		raw_res_spin_unlock_irqrestore(test_ab ? &lock_b : &lock_a, flags);
+	if (!ret)
+		raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags);
 }
 
 static void free_rqsl_threads(void)
@@ -142,13 +242,19 @@ static int bpf_test_rqspinlock_init(void)
 	int i, ret;
 	int ncpus = num_online_cpus();
 
-	pr_err("Mode = %s\n", test_ab ? "ABBA" : "AA");
+	if (test_mode < RQSL_MODE_AA || test_mode > RQSL_MODE_ABBCCA) {
+		pr_err("Invalid mode %d\n", test_mode);
+		return -EINVAL;
+	}
+
+	pr_err("Mode = %s\n", rqsl_mode_names[test_mode]);
 
-	if (ncpus < 3)
+	if (ncpus < test_mode + 2)
 		return -ENOTSUPP;
 
 	raw_res_spin_lock_init(&lock_a);
 	raw_res_spin_lock_init(&lock_b);
+	raw_res_spin_lock_init(&lock_c);
 
 	rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL);
 	if (!rqsl_evts)
@@ -196,10 +302,88 @@ err_perf_events:
 
 module_init(bpf_test_rqspinlock_init);
 
+static void rqsl_print_histograms(void)
+{
+	int cpu, i;
+
+	pr_err("rqspinlock acquisition latency histogram (ms):\n");
+
+	for_each_online_cpu(cpu) {
+		struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu);
+		u64 norm_counts[RQSL_NR_HIST_BUCKETS];
+		u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
+		u64 total_counts[RQSL_NR_HIST_BUCKETS];
+		u64 norm_success, nmi_success, success_total;
+		u64 norm_failure, nmi_failure, failure_total;
+		u64 norm_total = 0, nmi_total = 0, total = 0;
+		bool has_slow = false;
+
+		for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+			norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]);
+			nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]);
+			total_counts[i] = norm_counts[i] + nmi_counts[i];
+			norm_total += norm_counts[i];
+			nmi_total += nmi_counts[i];
+			total += total_counts[i];
+			if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS &&
+			    total_counts[i])
+				has_slow = true;
+		}
+
+		norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]);
+		nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]);
+		norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]);
+		nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]);
+		success_total = norm_success + nmi_success;
+		failure_total = norm_failure + nmi_failure;
+
+		if (!total)
+			continue;
+
+		if (!has_slow) {
+			pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+			       "success %llu (normal %llu, nmi %llu) | "
+			       "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n",
+			       cpu, total, norm_total, nmi_total,
+			       success_total, norm_success, nmi_success,
+			       failure_total, norm_failure, nmi_failure,
+			       RQSL_SLOW_THRESHOLD_MS);
+			continue;
+		}
+
+		pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+		       "success %llu (normal %llu, nmi %llu) | "
+		       "failure %llu (normal %llu, nmi %llu)\n",
+		       cpu, total, norm_total, nmi_total,
+		       success_total, norm_success, nmi_success,
+		       failure_total, norm_failure, nmi_failure);
+		for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+			unsigned int start_ms;
+
+			if (!total_counts[i])
+				continue;
+
+			start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1;
+			if (i == RQSL_NR_HIST_BUCKETS - 1) {
+				pr_err("   >= %ums: total %llu (normal %llu, nmi %llu)\n",
+				       start_ms, total_counts[i],
+				       norm_counts[i], nmi_counts[i]);
+			} else {
+				pr_err("   %u-%ums: total %llu (normal %llu, nmi %llu)\n",
+				       start_ms, rqsl_hist_ms[i],
+				       total_counts[i],
+				       norm_counts[i], nmi_counts[i]);
+			}
+		}
+	}
+}
+
 static void bpf_test_rqspinlock_exit(void)
 {
+	WRITE_ONCE(pause, 1);
 	free_rqsl_threads();
 	free_rqsl_evts();
+	rqsl_print_histograms();
 }
 
 module_exit(bpf_test_rqspinlock_exit);
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 8074bc5f6f20..1669a7eeda26 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -417,6 +417,30 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d,
 	return a + (long)b + c + d + (long)e + f + g + h + i + j + k;
 }
 
+noinline void bpf_testmod_stacktrace_test(void)
+{
+	/* used for stacktrace test as attach function */
+	asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_3(void)
+{
+	bpf_testmod_stacktrace_test();
+	asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_2(void)
+{
+	bpf_testmod_stacktrace_test_3();
+	asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_1(void)
+{
+	bpf_testmod_stacktrace_test_2();
+	asm volatile ("");
+}
+
 int bpf_testmod_fentry_ok;
 
 noinline ssize_t
@@ -497,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 			21, 22, 23, 24, 25, 26) != 231)
 		goto out;
 
+	bpf_testmod_stacktrace_test_1();
+
 	bpf_testmod_fentry_ok = 1;
 out:
 	return -EIO; /* always fail */
@@ -900,7 +926,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args)
 		goto out;
 	}
 
-	err = kernel_connect(sock, (struct sockaddr *)&args->addr,
+	err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr,
 			     args->addrlen, 0);
 out:
 	mutex_unlock(&sock_lock);
@@ -923,7 +949,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args)
 		goto out;
 	}
 
-	err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen);
+	err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen);
 out:
 	mutex_unlock(&sock_lock);
 
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 74ecc281bb8c..338c035c3688 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -43,6 +43,7 @@
 #define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
 #define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
 #define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
+#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size="
 
 /* Warning: duplicated in bpf_misc.h */
 #define POINTER_VALUE	0xbadcafe
@@ -89,6 +90,7 @@ struct test_spec {
 	int mode_mask;
 	int arch_mask;
 	int load_mask;
+	int linear_sz;
 	bool auxiliary;
 	bool valid;
 };
@@ -633,6 +635,21 @@ static int parse_test_spec(struct test_loader *tester,
 					      &spec->unpriv.stdout);
 			if (err)
 				goto cleanup;
+		} else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) {
+			switch (bpf_program__type(prog)) {
+			case BPF_PROG_TYPE_SCHED_ACT:
+			case BPF_PROG_TYPE_SCHED_CLS:
+			case BPF_PROG_TYPE_CGROUP_SKB:
+				val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1;
+				err = parse_int(val, &spec->linear_sz, "test linear size");
+				if (err)
+					goto cleanup;
+				break;
+			default:
+				PRINT_FAIL("__linear_size for unsupported program type");
+				err = -EINVAL;
+				goto cleanup;
+			}
 		}
 	}
 
@@ -1007,10 +1024,11 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
 	}
 }
 
-static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz)
 {
 	__u8 tmp_out[TEST_DATA_LEN << 2] = {};
 	__u8 tmp_in[TEST_DATA_LEN] = {};
+	struct __sk_buff ctx = {};
 	int err, saved_errno;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = tmp_in,
@@ -1020,6 +1038,12 @@ static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
 		.repeat = 1,
 	);
 
+	if (linear_sz) {
+		ctx.data_end = linear_sz;
+		topts.ctx_in = &ctx;
+		topts.ctx_size_in = sizeof(ctx);
+	}
+
 	if (empty_opts) {
 		memset(&topts, 0, sizeof(struct bpf_test_run_opts));
 		topts.sz = sizeof(struct bpf_test_run_opts);
@@ -1269,7 +1293,8 @@ void run_subtest(struct test_loader *tester,
 		}
 
 		err = do_prog_test_run(bpf_program__fd(tprog), &retval,
-				       bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
+				       bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false,
+				       spec->linear_sz);
 		if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
 			PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
 			goto tobj_cleanup;
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 3fae9ce46ca9..ccc5acd55ff9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1399,7 +1399,8 @@ static void test_map_stress(void)
 static bool can_retry(int err)
 {
 	return (err == EAGAIN || err == EBUSY ||
-		(err == ENOMEM && map_opts.map_flags == BPF_F_NO_PREALLOC));
+		((err == ENOMEM || err == E2BIG) &&
+		 map_opts.map_flags == BPF_F_NO_PREALLOC));
 }
 
 int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 5546b05a0486..f1300047c1e0 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -116,7 +116,7 @@ static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
 	static const struct sockaddr_alg alg = {
 		.salg_family	= AF_ALG,
 		.salg_type	= "hash",
-		.salg_name	= "sha1",
+		.salg_name	= "sha256",
 	};
 	int fd_base, fd_alg, ret;
 	ssize_t size;
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
deleted file mode 100755
index 76f0bd17061f..000000000000
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test installs a TC bpf program that throttles a TCP flow
-# with dst port = 9000 down to 5MBps. Then it measures actual
-# throughput of the flow.
-
-BPF_FILE="test_tc_edt.bpf.o"
-if [[ $EUID -ne 0 ]]; then
-	echo "This script must be run as root"
-	echo "FAIL"
-	exit 1
-fi
-
-# check that nc, dd, and timeout are present
-command -v nc >/dev/null 2>&1 || \
-	{ echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
-	{ echo >&2 "nc is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
-	{ echo >&2 "timeout is not available"; exit 1; }
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP_SRC="172.16.1.100"
-readonly IP_DST="172.16.2.100"
-
-cleanup()
-{
-	ip netns del ${NS_SRC}
-	ip netns del ${NS_DST}
-}
-
-trap cleanup EXIT
-
-set -e  # exit on error
-
-ip netns add "${NS_SRC}"
-ip netns add "${NS_DST}"
-ip link add veth_src type veth peer name veth_dst
-ip link set veth_src netns ${NS_SRC}
-ip link set veth_dst netns ${NS_DST}
-
-ip -netns ${NS_SRC} addr add ${IP_SRC}/24  dev veth_src
-ip -netns ${NS_DST} addr add ${IP_DST}/24  dev veth_dst
-
-ip -netns ${NS_SRC} link set dev veth_src up
-ip -netns ${NS_DST} link set dev veth_dst up
-
-ip -netns ${NS_SRC} route add ${IP_DST}/32  dev veth_src
-ip -netns ${NS_DST} route add ${IP_SRC}/32  dev veth_dst
-
-# set up TC on TX
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
-ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
-	bpf da obj ${BPF_FILE} sec cls_test
-
-
-# start the listener
-ip netns exec ${NS_DST} bash -c \
-	"nc -4 -l -p 9000 >/dev/null &"
-declare -i NC_PID=$!
-sleep 1
-
-declare -ir TIMEOUT=20
-declare -ir EXPECTED_BPS=5000000
-
-# run the load, capture RX bytes on DST
-declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
-	cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-set +e
-ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
-	bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
-set -e
-
-declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
-	cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
-
-echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
-	awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
-		$1, ($2-$3)*100.0/$3}'
-
-# Pass the test if the actual bps is within 1% of the expected bps.
-# The difference is usually about 0.1% on a 20-sec test, and ==> zero
-# the longer the test runs.
-declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
-	 awk 'function abs(x){return ((x < 0.0) ? -x : x)}
-	      {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
-		else { print "0"} }' )
-if [ "${RES}" == "0" ] ; then
-	echo "PASS"
-else
-	echo "FAIL"
-	exit 1
-fi
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
deleted file mode 100755
index cb55a908bb0d..000000000000
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ /dev/null
@@ -1,320 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# In-place tunneling
-
-BPF_FILE="test_tc_tunnel.bpf.o"
-# must match the port that the bpf program filters on
-readonly port=8000
-
-readonly ns_prefix="ns-$$-"
-readonly ns1="${ns_prefix}1"
-readonly ns2="${ns_prefix}2"
-
-readonly ns1_v4=192.168.1.1
-readonly ns2_v4=192.168.1.2
-readonly ns1_v6=fd::1
-readonly ns2_v6=fd::2
-
-# Must match port used by bpf program
-readonly udpport=5555
-# MPLSoverUDP
-readonly mplsudpport=6635
-readonly mplsproto=137
-
-readonly infile="$(mktemp)"
-readonly outfile="$(mktemp)"
-
-setup() {
-	ip netns add "${ns1}"
-	ip netns add "${ns2}"
-
-	ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
-	      peer name veth2 mtu 1500 netns "${ns2}"
-
-	ip netns exec "${ns1}" ethtool -K veth1 tso off
-
-	ip -netns "${ns1}" link set veth1 up
-	ip -netns "${ns2}" link set veth2 up
-
-	ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
-	ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
-	ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
-	ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
-
-	# clamp route to reserve room for tunnel headers
-	ip -netns "${ns1}" -4 route flush table main
-	ip -netns "${ns1}" -6 route flush table main
-	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
-	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
-
-	sleep 1
-
-	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
-}
-
-cleanup() {
-	ip netns del "${ns2}"
-	ip netns del "${ns1}"
-
-	if [[ -f "${outfile}" ]]; then
-		rm "${outfile}"
-	fi
-	if [[ -f "${infile}" ]]; then
-		rm "${infile}"
-	fi
-
-	if [[ -n $server_pid ]]; then
-		kill $server_pid 2> /dev/null
-	fi
-}
-
-server_listen() {
-	ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
-	server_pid=$!
-}
-
-client_connect() {
-	ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
-	echo $?
-}
-
-verify_data() {
-	wait "${server_pid}"
-	server_pid=
-	# sha1sum returns two fields [sha1] [filepath]
-	# convert to bash array and access first elem
-	insum=($(sha1sum ${infile}))
-	outsum=($(sha1sum ${outfile}))
-	if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
-		echo "data mismatch"
-		exit 1
-	fi
-}
-
-wait_for_port() {
-	for i in $(seq 20); do
-		if ip netns exec "${ns2}" ss ${2:--4}OHntl | grep -q "$1"; then
-			return 0
-		fi
-		sleep 0.1
-	done
-	return 1
-}
-
-set -e
-
-# no arguments: automated test, run all
-if [[ "$#" -eq "0" ]]; then
-	echo "ipip"
-	$0 ipv4 ipip none 100
-
-	echo "ipip6"
-	$0 ipv4 ipip6 none 100
-
-	echo "ip6ip6"
-	$0 ipv6 ip6tnl none 100
-
-	echo "sit"
-	$0 ipv6 sit none 100
-
-	echo "ip4 vxlan"
-	$0 ipv4 vxlan eth 2000
-
-	echo "ip6 vxlan"
-	$0 ipv6 ip6vxlan eth 2000
-
-	for mac in none mpls eth ; do
-		echo "ip gre $mac"
-		$0 ipv4 gre $mac 100
-
-		echo "ip6 gre $mac"
-		$0 ipv6 ip6gre $mac 100
-
-		echo "ip gre $mac gso"
-		$0 ipv4 gre $mac 2000
-
-		echo "ip6 gre $mac gso"
-		$0 ipv6 ip6gre $mac 2000
-
-		echo "ip udp $mac"
-		$0 ipv4 udp $mac 100
-
-		echo "ip6 udp $mac"
-		$0 ipv6 ip6udp $mac 100
-
-		echo "ip udp $mac gso"
-		$0 ipv4 udp $mac 2000
-
-		echo "ip6 udp $mac gso"
-		$0 ipv6 ip6udp $mac 2000
-	done
-
-	echo "OK. All tests passed"
-	exit 0
-fi
-
-if [[ "$#" -ne "4" ]]; then
-	echo "Usage: $0"
-	echo "   or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
-	exit 1
-fi
-
-case "$1" in
-"ipv4")
-	readonly addr1="${ns1_v4}"
-	readonly addr2="${ns2_v4}"
-	readonly ipproto=4
-	readonly netcat_opt=-${ipproto}
-	readonly foumod=fou
-	readonly foutype=ipip
-	readonly fouproto=4
-	readonly fouproto_mpls=${mplsproto}
-	readonly gretaptype=gretap
-	;;
-"ipv6")
-	readonly addr1="${ns1_v6}"
-	readonly addr2="${ns2_v6}"
-	readonly ipproto=6
-	readonly netcat_opt=-${ipproto}
-	readonly foumod=fou6
-	readonly foutype=ip6tnl
-	readonly fouproto="41 -6"
-	readonly fouproto_mpls="${mplsproto} -6"
-	readonly gretaptype=ip6gretap
-	;;
-*)
-	echo "unknown arg: $1"
-	exit 1
-	;;
-esac
-
-readonly tuntype=$2
-readonly mac=$3
-readonly datalen=$4
-
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
-
-trap cleanup EXIT
-
-setup
-
-# basic communication works
-echo "test basic connectivity"
-server_listen
-wait_for_port ${port} ${netcat_opt}
-client_connect
-verify_data
-
-# clientside, insert bpf program to encap all TCP to port ${port}
-# client can no longer connect
-ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
-ip netns exec "${ns1}" tc filter add dev veth1 egress \
-	bpf direct-action object-file ${BPF_FILE} \
-	section "encap_${tuntype}_${mac}"
-echo "test bpf encap without decap (expect failure)"
-server_listen
-wait_for_port ${port} ${netcat_opt}
-! client_connect
-
-if [[ "$tuntype" =~ "udp" ]]; then
-	# Set up fou tunnel.
-	ttype="${foutype}"
-	targs="encap fou encap-sport auto encap-dport $udpport"
-	# fou may be a module; allow this to fail.
-	modprobe "${foumod}" ||true
-	if [[ "$mac" == "mpls" ]]; then
-		dport=${mplsudpport}
-		dproto=${fouproto_mpls}
-		tmode="mode any ttl 255"
-	else
-		dport=${udpport}
-		dproto=${fouproto}
-	fi
-	ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
-	targs="encap fou encap-sport auto encap-dport $dport"
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
-	ttype=$gretaptype
-elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
-	ttype="vxlan"
-	targs="id 1 dstport 8472 udp6zerocsumrx"
-elif [[ "$tuntype" == "ipip6" ]]; then
-	ttype="ip6tnl"
-	targs=""
-else
-	ttype=$tuntype
-	targs=""
-fi
-
-# tunnel address family differs from inner for SIT
-if [[ "${tuntype}" == "sit" ]]; then
-	link_addr1="${ns1_v4}"
-	link_addr2="${ns2_v4}"
-elif [[ "${tuntype}" == "ipip6" ]]; then
-	link_addr1="${ns1_v6}"
-	link_addr2="${ns2_v6}"
-else
-	link_addr1="${addr1}"
-	link_addr2="${addr2}"
-fi
-
-# serverside, insert decap module
-# server is still running
-# client can connect again
-ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
-	${tmode} remote "${link_addr1}" local "${link_addr2}" $targs
-
-expect_tun_fail=0
-
-if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
-	# No support for MPLS IPv6 fou tunnel; expect failure.
-	expect_tun_fail=1
-elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
-	# No support for TEB fou tunnel; expect failure.
-	expect_tun_fail=1
-elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
-	# Share ethernet address between tunnel/veth2 so L2 decap works.
-	ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
-		  awk '/ether/ { print $2 }')
-	ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
-elif [[ "$mac" == "mpls" ]]; then
-	modprobe mpls_iptunnel ||true
-	modprobe mpls_gso ||true
-	ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
-	ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
-	ip netns exec "${ns2}" ip link set lo up
-	ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
-	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
-fi
-
-# Because packets are decapped by the tunnel they arrive on testtun0 from
-# the IP stack perspective.  Ensure reverse path filtering is disabled
-# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
-# expected veth2 (veth2 is where 192.168.1.2 is configured).
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-# rp needs to be disabled for both all and testtun0 as the rp value is
-# selected as the max of the "all" and device-specific values.
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
-ip netns exec "${ns2}" ip link set dev testtun0 up
-if [[ "$expect_tun_fail" == 1 ]]; then
-	# This tunnel mode is not supported, so we expect failure.
-	echo "test bpf encap with tunnel device decap (expect failure)"
-	! client_connect
-else
-	echo "test bpf encap with tunnel device decap"
-	client_connect
-	verify_data
-	server_listen
-	wait_for_port ${port} ${netcat_opt}
-fi
-
-# serverside, use BPF for decap
-ip netns exec "${ns2}" ip link del dev testtun0
-ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
-ip netns exec "${ns2}" tc filter add dev veth2 ingress \
-	bpf direct-action object-file ${BPF_FILE} section decap
-echo "test bpf encap with bpf decap"
-client_connect
-verify_data
-
-echo OK
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 352adc8df2d1..9234a58b0a97 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -74,31 +74,23 @@
 #define _GNU_SOURCE
 #include <assert.h>
 #include <fcntl.h>
-#include <errno.h>
 #include <getopt.h>
 #include <linux/if_link.h>
 #include <linux/if_ether.h>
 #include <linux/mman.h>
 #include <linux/netdev.h>
-#include <linux/bitmap.h>
 #include <linux/ethtool.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <locale.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <libgen.h>
-#include <string.h>
 #include <stddef.h>
 #include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/time.h>
 #include <sys/types.h>
-#include <unistd.h>
 
+#include "prog_tests/test_xsk.h"
 #include "xsk_xdp_progs.skel.h"
 #include "xsk.h"
 #include "xskxceiver.h"
@@ -109,9 +101,6 @@
 
 #include <network_helpers.h>
 
-#define MAX_TX_BUDGET_DEFAULT 32
-
-static bool opt_verbose;
 static bool opt_print_tests;
 static enum test_mode opt_mode = TEST_MODE_ALL;
 static u32 opt_run_test = RUN_ALL_TESTS;
@@ -120,169 +109,12 @@ void test__fail(void) { /* for network_helpers.c */ }
 
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
-	ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
-			      strerror(error));
+	ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line,
+			      error, strerror(error));
 	ksft_exit_xfail();
 }
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
-static char *mode_string(struct test_spec *test)
-{
-	switch (test->mode) {
-	case TEST_MODE_SKB:
-		return "SKB";
-	case TEST_MODE_DRV:
-		return "DRV";
-	case TEST_MODE_ZC:
-		return "ZC";
-	default:
-		return "BOGUS";
-	}
-}
-
-static void report_failure(struct test_spec *test)
-{
-	if (test->fail)
-		return;
-
-	ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
-			      test->name);
-	test->fail = true;
-}
-
-/* The payload is a word consisting of a packet sequence number in the upper
- * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
- * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
- */
-static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
-{
-	u32 *ptr = (u32 *)dest, i;
-
-	start /= sizeof(*ptr);
-	size /= sizeof(*ptr);
-	for (i = 0; i < size; i++)
-		ptr[i] = htonl(pkt_nb << 16 | (i + start));
-}
-
-static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
-{
-	memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
-	memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
-	eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
-}
-
-static bool is_umem_valid(struct ifobject *ifobj)
-{
-	return !!ifobj->umem->umem;
-}
-
-static u32 mode_to_xdp_flags(enum test_mode mode)
-{
-	return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-}
-
-static u64 umem_size(struct xsk_umem_info *umem)
-{
-	return umem->num_frames * umem->frame_size;
-}
-
-static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
-			      u64 size)
-{
-	struct xsk_umem_config cfg = {
-		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
-		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-		.frame_size = umem->frame_size,
-		.frame_headroom = umem->frame_headroom,
-		.flags = XSK_UMEM__DEFAULT_FLAGS
-	};
-	int ret;
-
-	if (umem->fill_size)
-		cfg.fill_size = umem->fill_size;
-
-	if (umem->comp_size)
-		cfg.comp_size = umem->comp_size;
-
-	if (umem->unaligned_mode)
-		cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-
-	ret = xsk_umem__create(&umem->umem, buffer, size,
-			       &umem->fq, &umem->cq, &cfg);
-	if (ret)
-		return ret;
-
-	umem->buffer = buffer;
-	if (ifobj->shared_umem && ifobj->rx_on) {
-		umem->base_addr = umem_size(umem);
-		umem->next_buffer = umem_size(umem);
-	}
-
-	return 0;
-}
-
-static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
-{
-	u64 addr;
-
-	addr = umem->next_buffer;
-	umem->next_buffer += umem->frame_size;
-	if (umem->next_buffer >= umem->base_addr + umem_size(umem))
-		umem->next_buffer = umem->base_addr;
-
-	return addr;
-}
-
-static void umem_reset_alloc(struct xsk_umem_info *umem)
-{
-	umem->next_buffer = 0;
-}
-
-static void enable_busy_poll(struct xsk_socket_info *xsk)
-{
-	int sock_opt;
-
-	sock_opt = 1;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-
-	sock_opt = 20;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-
-	sock_opt = xsk->batch_size;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-}
-
-static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
-				  struct ifobject *ifobject, bool shared)
-{
-	struct xsk_socket_config cfg = {};
-	struct xsk_ring_cons *rxr;
-	struct xsk_ring_prod *txr;
-
-	xsk->umem = umem;
-	cfg.rx_size = xsk->rxqsize;
-	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	cfg.bind_flags = ifobject->bind_flags;
-	if (shared)
-		cfg.bind_flags |= XDP_SHARED_UMEM;
-	if (ifobject->mtu > MAX_ETH_PKT_SIZE)
-		cfg.bind_flags |= XDP_USE_SG;
-	if (umem->comp_size)
-		cfg.tx_size = umem->comp_size;
-	if (umem->fill_size)
-		cfg.rx_size = umem->fill_size;
-
-	txr = ifobject->tx_on ? &xsk->tx : NULL;
-	rxr = ifobject->rx_on ? &xsk->rx : NULL;
-	return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
-}
 
 static bool ifobj_zc_avail(struct ifobject *ifobject)
 {
@@ -314,7 +146,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
 	ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
 	ifobject->rx_on = true;
 	xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-	ret = __xsk_configure_socket(xsk, umem, ifobject, false);
+	ret = xsk_configure_socket(xsk, umem, ifobject, false);
 	if (!ret)
 		zc_avail = true;
 
@@ -327,25 +159,6 @@ out:
 	return zc_avail;
 }
 
-#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
-static unsigned int get_max_skb_frags(void)
-{
-	unsigned int max_skb_frags = 0;
-	FILE *file;
-
-	file = fopen(MAX_SKB_FRAGS_PATH, "r");
-	if (!file) {
-		ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
-		return 0;
-	}
-
-	if (fscanf(file, "%u", &max_skb_frags) != 1)
-		ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
-
-	fclose(file);
-	return max_skb_frags;
-}
-
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
 	{"busy-poll", no_argument, 0, 'b'},
@@ -446,2256 +259,36 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 	}
 }
 
-static int set_ring_size(struct ifobject *ifobj)
-{
-	int ret;
-	u32 ctr = 0;
-
-	while (ctr++ < SOCK_RECONF_CTR) {
-		ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
-		if (!ret)
-			break;
-
-		/* Retry if it fails */
-		if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
-			return -errno;
-
-		usleep(USLEEP_MAX);
-	}
-
-	return ret;
-}
-
-static int hw_ring_size_reset(struct ifobject *ifobj)
-{
-	ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
-	ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
-	return set_ring_size(ifobj);
-}
-
-static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-			     struct ifobject *ifobj_rx)
-{
-	u32 i, j;
-
-	for (i = 0; i < MAX_INTERFACES; i++) {
-		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
-		ifobj->xsk = &ifobj->xsk_arr[0];
-		ifobj->use_poll = false;
-		ifobj->use_fill_ring = true;
-		ifobj->release_rx = true;
-		ifobj->validation_func = NULL;
-		ifobj->use_metadata = false;
-
-		if (i == 0) {
-			ifobj->rx_on = false;
-			ifobj->tx_on = true;
-		} else {
-			ifobj->rx_on = true;
-			ifobj->tx_on = false;
-		}
-
-		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
-		ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
-		ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-
-		for (j = 0; j < MAX_SOCKETS; j++) {
-			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
-			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-			ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
-			if (i == 0)
-				ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
-			else
-				ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
-
-			memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
-			memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
-			ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
-			ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
-		}
-	}
-
-	if (ifobj_tx->hw_ring_size_supp)
-		hw_ring_size_reset(ifobj_tx);
-
-	test->ifobj_tx = ifobj_tx;
-	test->ifobj_rx = ifobj_rx;
-	test->current_step = 0;
-	test->total_steps = 1;
-	test->nb_sockets = 1;
-	test->fail = false;
-	test->set_ring = false;
-	test->adjust_tail = false;
-	test->adjust_tail_support = false;
-	test->mtu = MAX_ETH_PKT_SIZE;
-	test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
-	test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
-	test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
-	test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
-}
-
-static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-			   struct ifobject *ifobj_rx, enum test_mode mode,
-			   const struct test_spec *test_to_run)
-{
-	struct pkt_stream *tx_pkt_stream;
-	struct pkt_stream *rx_pkt_stream;
-	u32 i;
-
-	tx_pkt_stream = test->tx_pkt_stream_default;
-	rx_pkt_stream = test->rx_pkt_stream_default;
-	memset(test, 0, sizeof(*test));
-	test->tx_pkt_stream_default = tx_pkt_stream;
-	test->rx_pkt_stream_default = rx_pkt_stream;
-
-	for (i = 0; i < MAX_INTERFACES; i++) {
-		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
-		ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
-		if (mode == TEST_MODE_ZC)
-			ifobj->bind_flags |= XDP_ZEROCOPY;
-		else
-			ifobj->bind_flags |= XDP_COPY;
-	}
-
-	strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
-	test->test_func = test_to_run->test_func;
-	test->mode = mode;
-	__test_spec_init(test, ifobj_tx, ifobj_rx);
-}
-
-static void test_spec_reset(struct test_spec *test)
-{
-	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
-}
-
-static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
-				   struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
-				   struct bpf_map *xskmap_tx)
-{
-	test->xdp_prog_rx = xdp_prog_rx;
-	test->xdp_prog_tx = xdp_prog_tx;
-	test->xskmap_rx = xskmap_rx;
-	test->xskmap_tx = xskmap_tx;
-}
-
-static int test_spec_set_mtu(struct test_spec *test, int mtu)
-{
-	int err;
-
-	if (test->ifobj_rx->mtu != mtu) {
-		err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
-		if (err)
-			return err;
-		test->ifobj_rx->mtu = mtu;
-	}
-	if (test->ifobj_tx->mtu != mtu) {
-		err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
-		if (err)
-			return err;
-		test->ifobj_tx->mtu = mtu;
-	}
-
-	return 0;
-}
-
-static void pkt_stream_reset(struct pkt_stream *pkt_stream)
-{
-	if (pkt_stream) {
-		pkt_stream->current_pkt_nb = 0;
-		pkt_stream->nb_rx_pkts = 0;
-	}
-}
-
-static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
-{
-	if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
-		return NULL;
-
-	return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
-}
-
-static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
-{
-	while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
-		(*pkts_sent)++;
-		if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
-			return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
-		pkt_stream->current_pkt_nb++;
-	}
-	return NULL;
-}
-
-static void pkt_stream_delete(struct pkt_stream *pkt_stream)
-{
-	free(pkt_stream->pkts);
-	free(pkt_stream);
-}
-
-static void pkt_stream_restore_default(struct test_spec *test)
-{
-	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
-	struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
-
-	if (tx_pkt_stream != test->tx_pkt_stream_default) {
-		pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
-		test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
-	}
-
-	if (rx_pkt_stream != test->rx_pkt_stream_default) {
-		pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
-		test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
-	}
-}
-
-static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
-{
-	struct pkt_stream *pkt_stream;
-
-	pkt_stream = calloc(1, sizeof(*pkt_stream));
-	if (!pkt_stream)
-		return NULL;
-
-	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
-	if (!pkt_stream->pkts) {
-		free(pkt_stream);
-		return NULL;
-	}
-
-	pkt_stream->nb_pkts = nb_pkts;
-	return pkt_stream;
-}
-
-static bool pkt_continues(u32 options)
-{
-	return options & XDP_PKT_CONTD;
-}
-
-static u32 ceil_u32(u32 a, u32 b)
-{
-	return (a + b - 1) / b;
-}
-
-static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
-{
-	u32 nb_frags = 1, next_frag;
-
-	if (!pkt)
-		return 1;
-
-	if (!pkt_stream->verbatim) {
-		if (!pkt->valid || !pkt->len)
-			return 1;
-		return ceil_u32(pkt->len, frame_size);
-	}
-
-	/* Search for the end of the packet in verbatim mode */
-	if (!pkt_continues(pkt->options))
-		return nb_frags;
-
-	next_frag = pkt_stream->current_pkt_nb;
-	pkt++;
-	while (next_frag++ < pkt_stream->nb_pkts) {
-		nb_frags++;
-		if (!pkt_continues(pkt->options) || !pkt->valid)
-			break;
-		pkt++;
-	}
-	return nb_frags;
-}
-
-static bool set_pkt_valid(int offset, u32 len)
-{
-	return len <= MAX_ETH_JUMBO_SIZE;
-}
-
-static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
-	pkt->offset = offset;
-	pkt->len = len;
-	pkt->valid = set_pkt_valid(offset, len);
-}
-
-static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
-	bool prev_pkt_valid = pkt->valid;
-
-	pkt_set(pkt_stream, pkt, offset, len);
-	pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
-}
-
-static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
-{
-	return ceil_u32(len, umem->frame_size) * umem->frame_size;
-}
-
-static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
-{
-	struct pkt_stream *pkt_stream;
-	u32 i;
-
-	pkt_stream = __pkt_stream_alloc(nb_pkts);
-	if (!pkt_stream)
-		exit_with_error(ENOMEM);
-
-	pkt_stream->nb_pkts = nb_pkts;
-	pkt_stream->max_pkt_len = pkt_len;
-	for (i = 0; i < nb_pkts; i++) {
-		struct pkt *pkt = &pkt_stream->pkts[i];
-
-		pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
-		pkt->pkt_nb = nb_start + i * nb_off;
-	}
-
-	return pkt_stream;
-}
-
-static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
-{
-	return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
-}
-
-static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
-{
-	return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
-}
-
-static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
-{
-	ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
-}
-
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
-{
-	pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
-	pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
-}
-
-static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
-				      int offset)
-{
-	struct pkt_stream *pkt_stream;
-	u32 i;
-
-	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
-	for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
-		pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
-
-	ifobj->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
-{
-	__pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
-	__pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
-}
-
-static void pkt_stream_receive_half(struct test_spec *test)
-{
-	struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
-	u32 i;
-
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
-							      pkt_stream->pkts[0].len);
-	pkt_stream = test->ifobj_rx->xsk->pkt_stream;
-	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
-		pkt_stream->pkts[i].valid = false;
-
-	pkt_stream->nb_valid_entries /= 2;
-}
-
-static void pkt_stream_even_odd_sequence(struct test_spec *test)
-{
-	struct pkt_stream *pkt_stream;
-	u32 i;
-
-	for (i = 0; i < test->nb_sockets; i++) {
-		pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
-		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
-						   pkt_stream->pkts[0].len, i, 2);
-		test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
-
-		pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
-		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
-						   pkt_stream->pkts[0].len, i, 2);
-		test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
-	}
-}
-
-static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
-{
-	if (!pkt->valid)
-		return pkt->offset;
-	return pkt->offset + umem_alloc_buffer(umem);
-}
-
-static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
-{
-	pkt_stream->current_pkt_nb--;
-}
-
-static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
-			 u32 pkt_nb, u32 bytes_written)
-{
-	void *data = xsk_umem__get_data(umem->buffer, addr);
-
-	if (len < MIN_PKT_SIZE)
-		return;
-
-	if (!bytes_written) {
-		gen_eth_hdr(xsk, data);
-
-		len -= PKT_HDR_SIZE;
-		data += PKT_HDR_SIZE;
-	} else {
-		bytes_written -= PKT_HDR_SIZE;
-	}
-
-	write_payload(data, pkt_nb, bytes_written, len);
-}
-
-static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
-						       u32 nb_frames, bool verbatim)
-{
-	u32 i, len = 0, pkt_nb = 0, payload = 0;
-	struct pkt_stream *pkt_stream;
-
-	pkt_stream = __pkt_stream_alloc(nb_frames);
-	if (!pkt_stream)
-		exit_with_error(ENOMEM);
-
-	for (i = 0; i < nb_frames; i++) {
-		struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
-		struct pkt *frame = &frames[i];
-
-		pkt->offset = frame->offset;
-		if (verbatim) {
-			*pkt = *frame;
-			pkt->pkt_nb = payload;
-			if (!frame->valid || !pkt_continues(frame->options))
-				payload++;
-		} else {
-			if (frame->valid)
-				len += frame->len;
-			if (frame->valid && pkt_continues(frame->options))
-				continue;
-
-			pkt->pkt_nb = pkt_nb;
-			pkt->len = len;
-			pkt->valid = frame->valid;
-			pkt->options = 0;
-
-			len = 0;
-		}
-
-		print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
-			      pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
-
-		if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
-			pkt_stream->max_pkt_len = pkt->len;
-
-		if (pkt->valid)
-			pkt_stream->nb_valid_entries++;
-
-		pkt_nb++;
-	}
-
-	pkt_stream->nb_pkts = pkt_nb;
-	pkt_stream->verbatim = verbatim;
-	return pkt_stream;
-}
-
-static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
-{
-	struct pkt_stream *pkt_stream;
-
-	pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
-	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
-
-	pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_print_data(u32 *data, u32 cnt)
-{
-	u32 i;
-
-	for (i = 0; i < cnt; i++) {
-		u32 seqnum, pkt_nb;
-
-		seqnum = ntohl(*data) & 0xffff;
-		pkt_nb = ntohl(*data) >> 16;
-		ksft_print_msg("%u:%u ", pkt_nb, seqnum);
-		data++;
-	}
-}
-
-static void pkt_dump(void *pkt, u32 len, bool eth_header)
-{
-	struct ethhdr *ethhdr = pkt;
-	u32 i, *data;
-
-	if (eth_header) {
-		/*extract L2 frame */
-		ksft_print_msg("DEBUG>> L2: dst mac: ");
-		for (i = 0; i < ETH_ALEN; i++)
-			ksft_print_msg("%02X", ethhdr->h_dest[i]);
-
-		ksft_print_msg("\nDEBUG>> L2: src mac: ");
-		for (i = 0; i < ETH_ALEN; i++)
-			ksft_print_msg("%02X", ethhdr->h_source[i]);
-
-		data = pkt + PKT_HDR_SIZE;
-	} else {
-		data = pkt;
-	}
-
-	/*extract L5 frame */
-	ksft_print_msg("\nDEBUG>> L5: seqnum: ");
-	pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
-	ksft_print_msg("....");
-	if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
-		ksft_print_msg("\n.... ");
-		pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
-			       PKT_DUMP_NB_TO_PRINT);
-	}
-	ksft_print_msg("\n---------------------------------------\n");
-}
-
-static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
-{
-	u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
-	u32 offset = addr % umem->frame_size, expected_offset;
-	int pkt_offset = pkt->valid ? pkt->offset : 0;
-
-	if (!umem->unaligned_mode)
-		pkt_offset = 0;
-
-	expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
-
-	if (offset == expected_offset)
-		return true;
-
-	ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
-	return false;
-}
-
-static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
-{
-	void *data = xsk_umem__get_data(buffer, addr);
-	struct xdp_info *meta = data - sizeof(struct xdp_info);
-
-	if (meta->count != pkt->pkt_nb) {
-		ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
-			       __func__, pkt->pkt_nb,
-			       (unsigned long long)meta->count);
-		return false;
-	}
-
-	return true;
-}
-
-static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx)
-{
-	struct bpf_map *data_map;
-	int adjust_value = 0;
-	int key = 0;
-	int ret;
-
-	data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
-	if (!data_map || !bpf_map__is_internal(data_map)) {
-		ksft_print_msg("Error: could not find bss section of XDP program\n");
-		exit_with_error(errno);
-	}
-
-	ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
-	if (ret) {
-		ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
-		exit_with_error(errno);
-	}
-
-	/* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
-	 * helper is not supported. Skip the adjust_tail test case in this scenario.
-	 */
-	return adjust_value != -EOPNOTSUPP;
-}
-
-static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
-			  u32 bytes_processed)
-{
-	u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
-	void *data = xsk_umem__get_data(umem->buffer, addr);
-
-	addr -= umem->base_addr;
-
-	if (addr >= umem->num_frames * umem->frame_size ||
-	    addr + len > umem->num_frames * umem->frame_size) {
-		ksft_print_msg("Frag invalid addr: %llx len: %u\n",
-			       (unsigned long long)addr, len);
-		return false;
-	}
-	if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
-		ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
-			       (unsigned long long)addr, len);
-		return false;
-	}
-
-	pkt_data = data;
-	if (!bytes_processed) {
-		pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
-		len -= PKT_HDR_SIZE;
-	} else {
-		bytes_processed -= PKT_HDR_SIZE;
-	}
-
-	expected_seqnum = bytes_processed / sizeof(*pkt_data);
-	seqnum = ntohl(*pkt_data) & 0xffff;
-	pkt_nb = ntohl(*pkt_data) >> 16;
-
-	if (expected_pkt_nb != pkt_nb) {
-		ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
-			       __func__, expected_pkt_nb, pkt_nb);
-		goto error;
-	}
-	if (expected_seqnum != seqnum) {
-		ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
-			       __func__, expected_seqnum, seqnum);
-		goto error;
-	}
-
-	words_to_end = len / sizeof(*pkt_data) - 1;
-	pkt_data += words_to_end;
-	seqnum = ntohl(*pkt_data) & 0xffff;
-	expected_seqnum += words_to_end;
-	if (expected_seqnum != seqnum) {
-		ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
-			       __func__, expected_seqnum, seqnum);
-		goto error;
-	}
-
-	return true;
-
-error:
-	pkt_dump(data, len, !bytes_processed);
-	return false;
-}
-
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
-{
-	if (pkt->len != len) {
-		ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
-			       __func__, pkt->len, len);
-		pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
-		return false;
-	}
-
-	return true;
-}
-
-static u32 load_value(u32 *counter)
-{
-	return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
-}
-
-static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
-{
-	u32 max_budget = MAX_TX_BUDGET_DEFAULT;
-	u32 cons, ready_to_send;
-	int delta;
-
-	cons = load_value(xsk->tx.consumer);
-	ready_to_send = load_value(xsk->tx.producer) - cons;
-	*ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-
-	delta = load_value(xsk->tx.consumer) - cons;
-	/* By default, xsk should consume exact @max_budget descs at one
-	 * send in this case where hitting the max budget limit in while
-	 * loop is triggered in __xsk_generic_xmit(). Please make sure that
-	 * the number of descs to be sent is larger than @max_budget, or
-	 * else the tx.consumer will be updated in xskq_cons_peek_desc()
-	 * in time which hides the issue we try to verify.
-	 */
-	if (ready_to_send > max_budget && delta != max_budget)
-		return false;
-
-	return true;
-}
-
-static int kick_tx(struct xsk_socket_info *xsk)
-{
-	int ret;
-
-	if (xsk->check_consumer) {
-		if (!kick_tx_with_check(xsk, &ret))
-			return TEST_FAILURE;
-	} else {
-		ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-	}
-	if (ret >= 0)
-		return TEST_PASS;
-	if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
-		usleep(100);
-		return TEST_PASS;
-	}
-	return TEST_FAILURE;
-}
-
-static int kick_rx(struct xsk_socket_info *xsk)
-{
-	int ret;
-
-	ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-	if (ret < 0)
-		return TEST_FAILURE;
-
-	return TEST_PASS;
-}
-
-static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
-{
-	unsigned int rcvd;
-	u32 idx;
-	int ret;
-
-	if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
-		ret = kick_tx(xsk);
-		if (ret)
-			return TEST_FAILURE;
-	}
-
-	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
-	if (rcvd) {
-		if (rcvd > xsk->outstanding_tx) {
-			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
-
-			ksft_print_msg("[%s] Too many packets completed\n", __func__);
-			ksft_print_msg("Last completion address: %llx\n",
-				       (unsigned long long)addr);
-			return TEST_FAILURE;
-		}
-
-		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
-		xsk->outstanding_tx -= rcvd;
-	}
-
-	return TEST_PASS;
-}
-
-static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
-{
-	u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
-	u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
-	struct pkt_stream *pkt_stream = xsk->pkt_stream;
-	struct ifobject *ifobj = test->ifobj_rx;
-	struct xsk_umem_info *umem = xsk->umem;
-	struct pollfd fds = { };
-	struct pkt *pkt;
-	u64 first_addr = 0;
-	int ret;
-
-	fds.fd = xsk_socket__fd(xsk->xsk);
-	fds.events = POLLIN;
-
-	ret = kick_rx(xsk);
-	if (ret)
-		return TEST_FAILURE;
-
-	if (ifobj->use_poll) {
-		ret = poll(&fds, 1, POLL_TMOUT);
-		if (ret < 0)
-			return TEST_FAILURE;
-
-		if (!ret) {
-			if (!is_umem_valid(test->ifobj_tx))
-				return TEST_PASS;
-
-			ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
-			return TEST_CONTINUE;
-		}
-
-		if (!(fds.revents & POLLIN))
-			return TEST_CONTINUE;
-	}
-
-	rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
-	if (!rcvd)
-		return TEST_CONTINUE;
-
-	if (ifobj->use_fill_ring) {
-		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		while (ret != rcvd) {
-			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-				ret = poll(&fds, 1, POLL_TMOUT);
-				if (ret < 0)
-					return TEST_FAILURE;
-			}
-			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		}
-	}
-
-	while (frags_processed < rcvd) {
-		const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
-		u64 addr = desc->addr, orig;
-
-		orig = xsk_umem__extract_addr(addr);
-		addr = xsk_umem__add_offset_to_addr(addr);
-
-		if (!nb_frags) {
-			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
-			if (!pkt) {
-				ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
-					       __func__, addr, desc->len);
-				return TEST_FAILURE;
-			}
-		}
-
-		print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
-			      addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
-
-		if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
-		    !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
-		    !is_metadata_correct(pkt, umem->buffer, addr)))
-			return TEST_FAILURE;
-
-		if (!nb_frags++)
-			first_addr = addr;
-		frags_processed++;
-		pkt_len += desc->len;
-		if (ifobj->use_fill_ring)
-			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
-
-		if (pkt_continues(desc->options))
-			continue;
-
-		/* The complete packet has been received */
-		if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
-		    !is_offset_correct(umem, pkt, addr))
-			return TEST_FAILURE;
-
-		pkt_stream->nb_rx_pkts++;
-		nb_frags = 0;
-		pkt_len = 0;
-	}
-
-	if (nb_frags) {
-		/* In the middle of a packet. Start over from beginning of packet. */
-		idx_rx -= nb_frags;
-		xsk_ring_cons__cancel(&xsk->rx, nb_frags);
-		if (ifobj->use_fill_ring) {
-			idx_fq -= nb_frags;
-			xsk_ring_prod__cancel(&umem->fq, nb_frags);
-		}
-		frags_processed -= nb_frags;
-	}
-
-	if (ifobj->use_fill_ring)
-		xsk_ring_prod__submit(&umem->fq, frags_processed);
-	if (ifobj->release_rx)
-		xsk_ring_cons__release(&xsk->rx, frags_processed);
-
-	pthread_mutex_lock(&pacing_mutex);
-	pkts_in_flight -= pkts_sent;
-	pthread_mutex_unlock(&pacing_mutex);
-	pkts_sent = 0;
-
-return TEST_CONTINUE;
-}
-
-bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
-			  unsigned long *bitmap)
-{
-	struct pkt_stream *pkt_stream = xsk->pkt_stream;
-
-	if (!pkt_stream) {
-		__set_bit(sock_num, bitmap);
-		return false;
-	}
-
-	if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
-		__set_bit(sock_num, bitmap);
-		if (bitmap_full(bitmap, test->nb_sockets))
-			return true;
-	}
-
-	return false;
-}
-
-static int receive_pkts(struct test_spec *test)
-{
-	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
-	DECLARE_BITMAP(bitmap, test->nb_sockets);
-	struct xsk_socket_info *xsk;
-	u32 sock_num = 0;
-	int res, ret;
-
-	ret = gettimeofday(&tv_now, NULL);
-	if (ret)
-		exit_with_error(errno);
-
-	timeradd(&tv_now, &tv_timeout, &tv_end);
-
-	while (1) {
-		xsk = &test->ifobj_rx->xsk_arr[sock_num];
-
-		if ((all_packets_received(test, xsk, sock_num, bitmap)))
-			break;
-
-		res = __receive_pkts(test, xsk);
-		if (!(res == TEST_PASS || res == TEST_CONTINUE))
-			return res;
-
-		ret = gettimeofday(&tv_now, NULL);
-		if (ret)
-			exit_with_error(errno);
-
-		if (timercmp(&tv_now, &tv_end, >)) {
-			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
-			return TEST_FAILURE;
-		}
-		sock_num = (sock_num + 1) % test->nb_sockets;
-	}
-
-	return TEST_PASS;
-}
-
-static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
-{
-	u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
-	struct pkt_stream *pkt_stream = xsk->pkt_stream;
-	struct xsk_umem_info *umem = ifobject->umem;
-	bool use_poll = ifobject->use_poll;
-	struct pollfd fds = { };
-	int ret;
-
-	buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
-	/* pkts_in_flight might be negative if many invalid packets are sent */
-	if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
-	    buffer_len)) {
-		ret = kick_tx(xsk);
-		if (ret)
-			return TEST_FAILURE;
-		return TEST_CONTINUE;
-	}
-
-	fds.fd = xsk_socket__fd(xsk->xsk);
-	fds.events = POLLOUT;
-
-	while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
-		if (use_poll) {
-			ret = poll(&fds, 1, POLL_TMOUT);
-			if (timeout) {
-				if (ret < 0) {
-					ksft_print_msg("ERROR: [%s] Poll error %d\n",
-						       __func__, errno);
-					return TEST_FAILURE;
-				}
-				if (ret == 0)
-					return TEST_PASS;
-				break;
-			}
-			if (ret <= 0) {
-				ksft_print_msg("ERROR: [%s] Poll error %d\n",
-					       __func__, errno);
-				return TEST_FAILURE;
-			}
-		}
-
-		complete_pkts(xsk, xsk->batch_size);
-	}
-
-	for (i = 0; i < xsk->batch_size; i++) {
-		struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
-		u32 nb_frags_left, nb_frags, bytes_written = 0;
-
-		if (!pkt)
-			break;
-
-		nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
-		if (nb_frags > xsk->batch_size - i) {
-			pkt_stream_cancel(pkt_stream);
-			xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
-			break;
-		}
-		nb_frags_left = nb_frags;
-
-		while (nb_frags_left--) {
-			struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-
-			tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
-			if (pkt_stream->verbatim) {
-				tx_desc->len = pkt->len;
-				tx_desc->options = pkt->options;
-			} else if (nb_frags_left) {
-				tx_desc->len = umem->frame_size;
-				tx_desc->options = XDP_PKT_CONTD;
-			} else {
-				tx_desc->len = pkt->len - bytes_written;
-				tx_desc->options = 0;
-			}
-			if (pkt->valid)
-				pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
-					     bytes_written);
-			bytes_written += tx_desc->len;
-
-			print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
-				      tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
-
-			if (nb_frags_left) {
-				i++;
-				if (pkt_stream->verbatim)
-					pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
-			}
-		}
-
-		if (pkt && pkt->valid) {
-			valid_pkts++;
-			valid_frags += nb_frags;
-		}
-	}
-
-	pthread_mutex_lock(&pacing_mutex);
-	pkts_in_flight += valid_pkts;
-	pthread_mutex_unlock(&pacing_mutex);
-
-	xsk_ring_prod__submit(&xsk->tx, i);
-	xsk->outstanding_tx += valid_frags;
-
-	if (use_poll) {
-		ret = poll(&fds, 1, POLL_TMOUT);
-		if (ret <= 0) {
-			if (ret == 0 && timeout)
-				return TEST_PASS;
-
-			ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
-			return TEST_FAILURE;
-		}
-	}
-
-	if (!timeout) {
-		if (complete_pkts(xsk, i))
-			return TEST_FAILURE;
-
-		usleep(10);
-		return TEST_PASS;
-	}
-
-	return TEST_CONTINUE;
-}
-
-static int wait_for_tx_completion(struct xsk_socket_info *xsk)
-{
-	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
-	int ret;
-
-	ret = gettimeofday(&tv_now, NULL);
-	if (ret)
-		exit_with_error(errno);
-	timeradd(&tv_now, &tv_timeout, &tv_end);
-
-	while (xsk->outstanding_tx) {
-		ret = gettimeofday(&tv_now, NULL);
-		if (ret)
-			exit_with_error(errno);
-		if (timercmp(&tv_now, &tv_end, >)) {
-			ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
-			return TEST_FAILURE;
-		}
-
-		complete_pkts(xsk, xsk->batch_size);
-	}
-
-	return TEST_PASS;
-}
-
-bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
-{
-	return bitmap_full(bitmap, test->nb_sockets);
-}
-
-static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
-{
-	bool timeout = !is_umem_valid(test->ifobj_rx);
-	DECLARE_BITMAP(bitmap, test->nb_sockets);
-	u32 i, ret;
-
-	while (!(all_packets_sent(test, bitmap))) {
-		for (i = 0; i < test->nb_sockets; i++) {
-			struct pkt_stream *pkt_stream;
-
-			pkt_stream = ifobject->xsk_arr[i].pkt_stream;
-			if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
-				__set_bit(i, bitmap);
-				continue;
-			}
-			ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
-			if (ret == TEST_CONTINUE && !test->fail)
-				continue;
-
-			if ((ret || test->fail) && !timeout)
-				return TEST_FAILURE;
-
-			if (ret == TEST_PASS && timeout)
-				return ret;
-
-			ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
-			if (ret)
-				return TEST_FAILURE;
-		}
-	}
-
-	return TEST_PASS;
-}
-
-static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
-{
-	int fd = xsk_socket__fd(xsk), err;
-	socklen_t optlen, expected_len;
-
-	optlen = sizeof(*stats);
-	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
-	if (err) {
-		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-			       __func__, -err, strerror(-err));
-		return TEST_FAILURE;
-	}
-
-	expected_len = sizeof(struct xdp_statistics);
-	if (optlen != expected_len) {
-		ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
-			       __func__, expected_len, optlen);
-		return TEST_FAILURE;
-	}
-
-	return TEST_PASS;
-}
-
-static int validate_rx_dropped(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	struct xdp_statistics stats;
-	int err;
-
-	err = kick_rx(ifobject->xsk);
-	if (err)
-		return TEST_FAILURE;
-
-	err = get_xsk_stats(xsk, &stats);
-	if (err)
-		return TEST_FAILURE;
-
-	/* The receiver calls getsockopt after receiving the last (valid)
-	 * packet which is not the final packet sent in this test (valid and
-	 * invalid packets are sent in alternating fashion with the final
-	 * packet being invalid). Since the last packet may or may not have
-	 * been dropped already, both outcomes must be allowed.
-	 */
-	if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
-	    stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
-		return TEST_PASS;
-
-	return TEST_FAILURE;
-}
-
-static int validate_rx_full(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	struct xdp_statistics stats;
-	int err;
-
-	usleep(1000);
-	err = kick_rx(ifobject->xsk);
-	if (err)
-		return TEST_FAILURE;
-
-	err = get_xsk_stats(xsk, &stats);
-	if (err)
-		return TEST_FAILURE;
-
-	if (stats.rx_ring_full)
-		return TEST_PASS;
-
-	return TEST_FAILURE;
-}
-
-static int validate_fill_empty(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	struct xdp_statistics stats;
-	int err;
-
-	usleep(1000);
-	err = kick_rx(ifobject->xsk);
-	if (err)
-		return TEST_FAILURE;
-
-	err = get_xsk_stats(xsk, &stats);
-	if (err)
-		return TEST_FAILURE;
-
-	if (stats.rx_fill_ring_empty_descs)
-		return TEST_PASS;
-
-	return TEST_FAILURE;
-}
-
-static int validate_tx_invalid_descs(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	int fd = xsk_socket__fd(xsk);
-	struct xdp_statistics stats;
-	socklen_t optlen;
-	int err;
-
-	optlen = sizeof(stats);
-	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-	if (err) {
-		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-			       __func__, -err, strerror(-err));
-		return TEST_FAILURE;
-	}
-
-	if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
-		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
-			       __func__,
-			       (unsigned long long)stats.tx_invalid_descs,
-			       ifobject->xsk->pkt_stream->nb_pkts);
-		return TEST_FAILURE;
-	}
-
-	return TEST_PASS;
-}
-
-static void xsk_configure_socket(struct test_spec *test, struct ifobject *ifobject,
-				 struct xsk_umem_info *umem, bool tx)
-{
-	int i, ret;
-
-	for (i = 0; i < test->nb_sockets; i++) {
-		bool shared = (ifobject->shared_umem && tx) ? true : !!i;
-		u32 ctr = 0;
-
-		while (ctr++ < SOCK_RECONF_CTR) {
-			ret = __xsk_configure_socket(&ifobject->xsk_arr[i], umem,
-						     ifobject, shared);
-			if (!ret)
-				break;
-
-			/* Retry if it fails as xsk_socket__create() is asynchronous */
-			if (ctr >= SOCK_RECONF_CTR)
-				exit_with_error(-ret);
-			usleep(USLEEP_MAX);
-		}
-		if (ifobject->busy_poll)
-			enable_busy_poll(&ifobject->xsk_arr[i]);
-	}
-}
-
-static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
-{
-	xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true);
-	ifobject->xsk = &ifobject->xsk_arr[0];
-	ifobject->xskmap = test->ifobj_rx->xskmap;
-	memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
-	ifobject->umem->base_addr = 0;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
-				   bool fill_up)
-{
-	u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
-	u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
-	int ret;
-
-	if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
-		buffers_to_fill = umem->num_frames;
-	else
-		buffers_to_fill = umem->fill_size;
-
-	ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
-	if (ret != buffers_to_fill)
-		exit_with_error(ENOSPC);
-
-	while (filled < buffers_to_fill) {
-		struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
-		u64 addr;
-		u32 i;
-
-		for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
-			if (!pkt) {
-				if (!fill_up)
-					break;
-				addr = filled * umem->frame_size + umem->base_addr;
-			} else if (pkt->offset >= 0) {
-				addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
-			} else {
-				addr = pkt->offset + umem_alloc_buffer(umem);
-			}
-
-			*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
-			if (++filled >= buffers_to_fill)
-				break;
-		}
-	}
-	xsk_ring_prod__submit(&umem->fq, filled);
-	xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
-
-	pkt_stream_reset(pkt_stream);
-	umem_reset_alloc(umem);
-}
-
-static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
-{
-	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
-	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-	void *bufs;
-	int ret;
-	u32 i;
-
-	if (ifobject->umem->unaligned_mode)
-		mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
-
-	if (ifobject->shared_umem)
-		umem_sz *= 2;
-
-	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-	if (bufs == MAP_FAILED)
-		exit_with_error(errno);
-
-	ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
-	if (ret)
-		exit_with_error(-ret);
-
-	xsk_configure_socket(test, ifobject, ifobject->umem, false);
-
-	ifobject->xsk = &ifobject->xsk_arr[0];
-
-	if (!ifobject->rx_on)
-		return;
-
-	xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
-
-	for (i = 0; i < test->nb_sockets; i++) {
-		ifobject->xsk = &ifobject->xsk_arr[i];
-		ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
-		if (ret)
-			exit_with_error(errno);
-	}
-}
-
-static void *worker_testapp_validate_tx(void *arg)
-{
-	struct test_spec *test = (struct test_spec *)arg;
-	struct ifobject *ifobject = test->ifobj_tx;
-	int err;
-
-	if (test->current_step == 1) {
-		if (!ifobject->shared_umem)
-			thread_common_ops(test, ifobject);
-		else
-			thread_common_ops_tx(test, ifobject);
-	}
-
-	err = send_pkts(test, ifobject);
-
-	if (!err && ifobject->validation_func)
-		err = ifobject->validation_func(ifobject);
-	if (err)
-		report_failure(test);
-
-	pthread_exit(NULL);
-}
-
-static void *worker_testapp_validate_rx(void *arg)
-{
-	struct test_spec *test = (struct test_spec *)arg;
-	struct ifobject *ifobject = test->ifobj_rx;
-	int err;
-
-	if (test->current_step == 1) {
-		thread_common_ops(test, ifobject);
-	} else {
-		xsk_clear_xskmap(ifobject->xskmap);
-		err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
-		if (err) {
-			ksft_print_msg("Error: Failed to update xskmap, error %s\n",
-				       strerror(-err));
-			exit_with_error(-err);
-		}
-	}
-
-	pthread_barrier_wait(&barr);
-
-	err = receive_pkts(test);
-
-	if (!err && ifobject->validation_func)
-		err = ifobject->validation_func(ifobject);
-
-	if (err) {
-		if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs))
-			test->adjust_tail_support = false;
-		else
-			report_failure(test);
-	}
-
-	pthread_exit(NULL);
-}
-
-static u64 ceil_u64(u64 a, u64 b)
-{
-	return (a + b - 1) / b;
-}
-
-static void testapp_clean_xsk_umem(struct ifobject *ifobj)
-{
-	u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
-
-	if (ifobj->shared_umem)
-		umem_sz *= 2;
-
-	umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
-	xsk_umem__delete(ifobj->umem->umem);
-	munmap(ifobj->umem->buffer, umem_sz);
-}
-
-static void handler(int signum)
-{
-	pthread_exit(NULL);
-}
-
-static bool xdp_prog_changed_rx(struct test_spec *test)
-{
-	struct ifobject *ifobj = test->ifobj_rx;
-
-	return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
-}
-
-static bool xdp_prog_changed_tx(struct test_spec *test)
-{
-	struct ifobject *ifobj = test->ifobj_tx;
-
-	return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
-}
-
-static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
-			     struct bpf_map *xskmap, enum test_mode mode)
-{
-	int err;
-
-	xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
-	err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
-	if (err) {
-		ksft_print_msg("Error attaching XDP program\n");
-		exit_with_error(-err);
-	}
-
-	if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
-		if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
-			ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
-			exit_with_error(EINVAL);
-		}
-
-	ifobj->xdp_prog = xdp_prog;
-	ifobj->xskmap = xskmap;
-	ifobj->mode = mode;
-}
-
-static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
-				 struct ifobject *ifobj_tx)
-{
-	if (xdp_prog_changed_rx(test))
-		xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
-
-	if (!ifobj_tx || ifobj_tx->shared_umem)
-		return;
-
-	if (xdp_prog_changed_tx(test))
-		xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
-}
-
-static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
-				      struct ifobject *ifobj2)
-{
-	pthread_t t0, t1;
-	int err;
-
-	if (test->mtu > MAX_ETH_PKT_SIZE) {
-		if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
-						   (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
-			ksft_test_result_skip("Multi buffer for zero-copy not supported.\n");
-			return TEST_SKIP;
-		}
-		if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
-						   (ifobj2 && !ifobj2->multi_buff_supp))) {
-			ksft_test_result_skip("Multi buffer not supported.\n");
-			return TEST_SKIP;
-		}
-	}
-	err = test_spec_set_mtu(test, test->mtu);
-	if (err) {
-		ksft_print_msg("Error, could not set mtu.\n");
-		exit_with_error(err);
-	}
-
-	if (ifobj2) {
-		if (pthread_barrier_init(&barr, NULL, 2))
-			exit_with_error(errno);
-		pkt_stream_reset(ifobj2->xsk->pkt_stream);
-	}
-
-	test->current_step++;
-	pkt_stream_reset(ifobj1->xsk->pkt_stream);
-	pkts_in_flight = 0;
-
-	signal(SIGUSR1, handler);
-	/*Spawn RX thread */
-	pthread_create(&t0, NULL, ifobj1->func_ptr, test);
-
-	if (ifobj2) {
-		pthread_barrier_wait(&barr);
-		if (pthread_barrier_destroy(&barr))
-			exit_with_error(errno);
-
-		/*Spawn TX thread */
-		pthread_create(&t1, NULL, ifobj2->func_ptr, test);
-
-		pthread_join(t1, NULL);
-	}
-
-	if (!ifobj2)
-		pthread_kill(t0, SIGUSR1);
-	else
-		pthread_join(t0, NULL);
-
-	if (test->total_steps == test->current_step || test->fail) {
-		u32 i;
-
-		if (ifobj2)
-			for (i = 0; i < test->nb_sockets; i++)
-				xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
-
-		for (i = 0; i < test->nb_sockets; i++)
-			xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
-
-		testapp_clean_xsk_umem(ifobj1);
-		if (ifobj2 && !ifobj2->shared_umem)
-			testapp_clean_xsk_umem(ifobj2);
-	}
-
-	return !!test->fail;
-}
-
-static int testapp_validate_traffic(struct test_spec *test)
-{
-	struct ifobject *ifobj_rx = test->ifobj_rx;
-	struct ifobject *ifobj_tx = test->ifobj_tx;
-
-	if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
-	    (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
-		ksft_test_result_skip("No huge pages present.\n");
-		return TEST_SKIP;
-	}
-
-	if (test->set_ring) {
-		if (ifobj_tx->hw_ring_size_supp) {
-			if (set_ring_size(ifobj_tx)) {
-				ksft_test_result_skip("Failed to change HW ring size.\n");
-				return TEST_FAILURE;
-			}
-		} else {
-			ksft_test_result_skip("Changing HW ring size not supported.\n");
-			return TEST_SKIP;
-		}
-	}
-
-	xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
-	return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
-}
-
-static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
-{
-	return __testapp_validate_traffic(test, ifobj, NULL);
-}
-
-static int testapp_teardown(struct test_spec *test)
-{
-	int i;
-
-	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-		if (testapp_validate_traffic(test))
-			return TEST_FAILURE;
-		test_spec_reset(test);
-	}
-
-	return TEST_PASS;
-}
-
-static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
-{
-	thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
-	struct ifobject *tmp_ifobj = (*ifobj1);
-
-	(*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
-	(*ifobj2)->func_ptr = tmp_func_ptr;
-
-	*ifobj1 = *ifobj2;
-	*ifobj2 = tmp_ifobj;
-}
-
-static int testapp_bidirectional(struct test_spec *test)
-{
-	int res;
-
-	test->ifobj_tx->rx_on = true;
-	test->ifobj_rx->tx_on = true;
-	test->total_steps = 2;
-	if (testapp_validate_traffic(test))
-		return TEST_FAILURE;
-
-	print_verbose("Switching Tx/Rx direction\n");
-	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-	res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
-
-	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-	return res;
-}
-
-static int swap_xsk_resources(struct test_spec *test)
-{
-	int ret;
-
-	test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
-	test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
-	test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
-	test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
-	test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
-	test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
-
-	ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
-	if (ret)
-		return TEST_FAILURE;
-
-	return TEST_PASS;
-}
-
-static int testapp_xdp_prog_cleanup(struct test_spec *test)
-{
-	test->total_steps = 2;
-	test->nb_sockets = 2;
-	if (testapp_validate_traffic(test))
-		return TEST_FAILURE;
-
-	if (swap_xsk_resources(test))
-		return TEST_FAILURE;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_headroom(struct test_spec *test)
-{
-	test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_dropped(struct test_spec *test)
-{
-	if (test->mode == TEST_MODE_ZC) {
-		ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
-		return TEST_SKIP;
-	}
-
-	pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
-	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
-		XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
-	pkt_stream_receive_half(test);
-	test->ifobj_rx->validation_func = validate_rx_dropped;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_tx_invalid_descs(struct test_spec *test)
-{
-	pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
-	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_full(struct test_spec *test)
-{
-	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
-	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
-	test->ifobj_rx->release_rx = false;
-	test->ifobj_rx->validation_func = validate_rx_full;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_fill_empty(struct test_spec *test)
-{
-	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
-	test->ifobj_rx->use_fill_ring = false;
-	test->ifobj_rx->validation_func = validate_fill_empty;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned(struct test_spec *test)
-{
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	/* Let half of the packets straddle a 4K buffer boundary */
-	pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_single_pkt(struct test_spec *test)
-{
-	struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
-
-	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc_mb(struct test_spec *test)
-{
-	struct xsk_umem_info *umem = test->ifobj_tx->umem;
-	u64 umem_size = umem->num_frames * umem->frame_size;
-	struct pkt pkts[] = {
-		/* Valid packet for synch to start with */
-		{0, MIN_PKT_SIZE, 0, true, 0},
-		/* Zero frame len is not legal */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, 0, 0, false, 0},
-		/* Invalid address in the second frame */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		/* Invalid len in the middle */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		/* Invalid options in the middle */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
-		/* Transmit 2 frags, receive 3 */
-		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
-		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
-		/* Middle frame crosses chunk boundary with small length */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
-		/* Valid packet for synch so that something is received */
-		{0, MIN_PKT_SIZE, 0, true, 0}};
-
-	if (umem->unaligned_mode) {
-		/* Crossing a chunk boundary allowed */
-		pkts[12].valid = true;
-		pkts[13].valid = true;
-	}
-
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc(struct test_spec *test)
-{
-	struct xsk_umem_info *umem = test->ifobj_tx->umem;
-	u64 umem_size = umem->num_frames * umem->frame_size;
-	struct pkt pkts[] = {
-		/* Zero packet address allowed */
-		{0, MIN_PKT_SIZE, 0, true},
-		/* Allowed packet */
-		{0, MIN_PKT_SIZE, 0, true},
-		/* Straddling the start of umem */
-		{-2, MIN_PKT_SIZE, 0, false},
-		/* Packet too large */
-		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
-		/* Up to end of umem allowed */
-		{umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
-		/* After umem ends */
-		{umem_size, MIN_PKT_SIZE, 0, false},
-		/* Straddle the end of umem */
-		{umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
-		/* Straddle a 4K boundary */
-		{0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
-		/* Straddle a 2K boundary */
-		{0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
-		/* Valid packet for synch so that something is received */
-		{0, MIN_PKT_SIZE, 0, true}};
-
-	if (umem->unaligned_mode) {
-		/* Crossing a page boundary allowed */
-		pkts[7].valid = true;
-	}
-	if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
-		/* Crossing a 2K frame size boundary not allowed */
-		pkts[8].valid = false;
-	}
-
-	if (test->ifobj_tx->shared_umem) {
-		pkts[4].offset += umem_size;
-		pkts[5].offset += umem_size;
-		pkts[6].offset += umem_size;
-	}
-
-	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_drop(struct test_spec *test)
-{
-	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
-	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
-	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
-			       skel_rx->maps.xsk, skel_tx->maps.xsk);
-
-	pkt_stream_receive_half(test);
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_metadata_copy(struct test_spec *test)
-{
-	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
-	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
-	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
-			       skel_tx->progs.xsk_xdp_populate_metadata,
-			       skel_rx->maps.xsk, skel_tx->maps.xsk);
-	test->ifobj_rx->use_metadata = true;
-
-	skel_rx->bss->count = 0;
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_shared_umem(struct test_spec *test)
-{
-	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
-	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
-	test->total_steps = 1;
-	test->nb_sockets = 2;
-
-	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
-			       skel_tx->progs.xsk_xdp_shared_umem,
-			       skel_rx->maps.xsk, skel_tx->maps.xsk);
-
-	pkt_stream_even_odd_sequence(test);
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_txq_tmout(struct test_spec *test)
-{
-	test->ifobj_tx->use_poll = true;
-	/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
-	test->ifobj_tx->umem->frame_size = 2048;
-	pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
-	return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
-}
-
-static int testapp_poll_rxq_tmout(struct test_spec *test)
-{
-	test->ifobj_rx->use_poll = true;
-	return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
-}
-
-static int testapp_too_many_frags(struct test_spec *test)
-{
-	struct pkt *pkts;
-	u32 max_frags, i;
-	int ret;
-
-	if (test->mode == TEST_MODE_ZC) {
-		max_frags = test->ifobj_tx->xdp_zc_max_segs;
-	} else {
-		max_frags = get_max_skb_frags();
-		if (!max_frags) {
-			ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
-			max_frags = 17;
-		}
-		max_frags += 1;
-	}
-
-	pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
-	if (!pkts)
-		return TEST_FAILURE;
-
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-
-	/* Valid packet for synch */
-	pkts[0].len = MIN_PKT_SIZE;
-	pkts[0].valid = true;
-
-	/* One valid packet with the max amount of frags */
-	for (i = 1; i < max_frags + 1; i++) {
-		pkts[i].len = MIN_PKT_SIZE;
-		pkts[i].options = XDP_PKT_CONTD;
-		pkts[i].valid = true;
-	}
-	pkts[max_frags].options = 0;
-
-	/* An invalid packet with the max amount of frags but signals packet
-	 * continues on the last frag
-	 */
-	for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
-		pkts[i].len = MIN_PKT_SIZE;
-		pkts[i].options = XDP_PKT_CONTD;
-		pkts[i].valid = false;
-	}
-
-	/* Valid packet for synch */
-	pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
-	pkts[2 * max_frags + 1].valid = true;
-
-	pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
-	ret = testapp_validate_traffic(test);
-
-	free(pkts);
-	return ret;
-}
-
-static int xsk_load_xdp_programs(struct ifobject *ifobj)
-{
-	ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
-	if (libbpf_get_error(ifobj->xdp_progs))
-		return libbpf_get_error(ifobj->xdp_progs);
-
-	return 0;
-}
-
 static void xsk_unload_xdp_programs(struct ifobject *ifobj)
 {
 	xsk_xdp_progs__destroy(ifobj->xdp_progs);
 }
 
-/* Simple test */
-static bool hugepages_present(void)
-{
-	size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
-	void *bufs;
-
-	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
-		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
-	if (bufs == MAP_FAILED)
-		return false;
-
-	mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
-	munmap(bufs, mmap_sz);
-	return true;
-}
-
-static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
-{
-	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
-	int err;
-
-	ifobj->func_ptr = func_ptr;
-
-	err = xsk_load_xdp_programs(ifobj);
-	if (err) {
-		ksft_print_msg("Error loading XDP program\n");
-		exit_with_error(err);
-	}
-
-	if (hugepages_present())
-		ifobj->unaligned_supp = true;
-
-	err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
-	if (err) {
-		ksft_print_msg("Error querying XDP capabilities\n");
-		exit_with_error(-err);
-	}
-	if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
-		ifobj->multi_buff_supp = true;
-	if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
-		if (query_opts.xdp_zc_max_segs > 1) {
-			ifobj->multi_buff_zc_supp = true;
-			ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
-		} else {
-			ifobj->xdp_zc_max_segs = 0;
-		}
-	}
-}
-
-static int testapp_send_receive(struct test_spec *test)
-{
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_2k_frame(struct test_spec *test)
-{
-	test->ifobj_tx->umem->frame_size = 2048;
-	test->ifobj_rx->umem->frame_size = 2048;
-	pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_rx(struct test_spec *test)
-{
-	test->ifobj_rx->use_poll = true;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_tx(struct test_spec *test)
-{
-	test->ifobj_tx->use_poll = true;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_aligned_inv_desc(struct test_spec *test)
-{
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
-{
-	test->ifobj_tx->umem->frame_size = 2048;
-	test->ifobj_rx->umem->frame_size = 2048;
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc(struct test_spec *test)
-{
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
-{
-	u64 page_size, umem_size;
-
-	/* Odd frame size so the UMEM doesn't end near a page boundary. */
-	test->ifobj_tx->umem->frame_size = 4001;
-	test->ifobj_rx->umem->frame_size = 4001;
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	/* This test exists to test descriptors that staddle the end of
-	 * the UMEM but not a page.
-	 */
-	page_size = sysconf(_SC_PAGESIZE);
-	umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
-	assert(umem_size % page_size > MIN_PKT_SIZE);
-	assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
-
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_mb(struct test_spec *test)
-{
-	return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_unaligned_inv_desc_mb(struct test_spec *test)
-{
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_xdp_metadata(struct test_spec *test)
-{
-	return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_xdp_metadata_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_hw_sw_min_ring_size(struct test_spec *test)
-{
-	int ret;
-
-	test->set_ring = true;
-	test->total_steps = 2;
-	test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
-	test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
-	test->ifobj_tx->xsk->batch_size = 1;
-	test->ifobj_rx->xsk->batch_size = 1;
-	ret = testapp_validate_traffic(test);
-	if (ret)
-		return ret;
-
-	/* Set batch size to hw_ring_size - 1 */
-	test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
-	test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_hw_sw_max_ring_size(struct test_spec *test)
-{
-	u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
-	int ret;
-
-	test->set_ring = true;
-	test->total_steps = 2;
-	test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
-	test->ifobj_tx->ring.rx_pending  = test->ifobj_tx->ring.rx_max_pending;
-	test->ifobj_rx->umem->num_frames = max_descs;
-	test->ifobj_rx->umem->fill_size = max_descs;
-	test->ifobj_rx->umem->comp_size = max_descs;
-	test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-
-	ret = testapp_validate_traffic(test);
-	if (ret)
-		return ret;
-
-	/* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
-	 * updating the Rx HW tail register.
-	 */
-	test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
-	test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
-	pkt_stream_replace(test, max_descs, MIN_PKT_SIZE);
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
-{
-	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
-	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
-	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
-			       skel_tx->progs.xsk_xdp_adjust_tail,
-			       skel_rx->maps.xsk, skel_tx->maps.xsk);
-
-	skel_rx->bss->adjust_value = adjust_value;
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
-{
-	int ret;
-
-	test->adjust_tail_support = true;
-	test->adjust_tail = true;
-	test->total_steps = 1;
-
-	pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
-	pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
-
-	ret = testapp_xdp_adjust_tail(test, value);
-	if (ret)
-		return ret;
-
-	if (!test->adjust_tail_support) {
-		ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
-				      mode_string(test), busy_poll_string(test));
-		return TEST_SKIP;
-	}
-
-	return 0;
-}
-
-static int testapp_adjust_tail_shrink(struct test_spec *test)
-{
-	/* Shrink by 4 bytes for testing purpose */
-	return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
-}
-
-static int testapp_adjust_tail_shrink_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	/* Shrink by the frag size */
-	return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
-}
-
-static int testapp_adjust_tail_grow(struct test_spec *test)
-{
-	/* Grow by 4 bytes for testing purpose */
-	return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
-}
-
-static int testapp_adjust_tail_grow_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	/* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
-	return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
-				   XSK_UMEM__LARGE_FRAME_SIZE * 2);
-}
-
-static int testapp_tx_queue_consumer(struct test_spec *test)
-{
-	int nr_packets;
-
-	if (test->mode == TEST_MODE_ZC) {
-		ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
-		return TEST_SKIP;
-	}
-
-	nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
-	pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE);
-	test->ifobj_tx->xsk->batch_size = nr_packets;
-	test->ifobj_tx->xsk->check_consumer = true;
-
-	return testapp_validate_traffic(test);
-}
-
 static void run_pkt_test(struct test_spec *test)
 {
 	int ret;
 
 	ret = test->test_func(test);
 
-	if (ret == TEST_PASS)
+	switch (ret) {
+	case TEST_PASS:
 		ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
 				      test->name);
-	pkt_stream_restore_default(test);
-}
-
-static struct ifobject *ifobject_create(void)
-{
-	struct ifobject *ifobj;
-
-	ifobj = calloc(1, sizeof(struct ifobject));
-	if (!ifobj)
-		return NULL;
-
-	ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
-	if (!ifobj->xsk_arr)
-		goto out_xsk_arr;
-
-	ifobj->umem = calloc(1, sizeof(*ifobj->umem));
-	if (!ifobj->umem)
-		goto out_umem;
-
-	return ifobj;
-
-out_umem:
-	free(ifobj->xsk_arr);
-out_xsk_arr:
-	free(ifobj);
-	return NULL;
-}
+		break;
+	case TEST_SKIP:
+		ksft_test_result_skip("SKIP: %s %s%s\n", mode_string(test), busy_poll_string(test),
+				      test->name);
+		break;
+	case TEST_FAILURE:
+		ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+				      test->name);
+		break;
+	default:
+		ksft_test_result_fail("FAIL: %s %s%s -- Unexpected returned value (%d)\n",
+				      mode_string(test), busy_poll_string(test), test->name, ret);
+	}
 
-static void ifobject_delete(struct ifobject *ifobj)
-{
-	free(ifobj->umem);
-	free(ifobj->xsk_arr);
-	free(ifobj);
+	pkt_stream_restore_default(test);
 }
 
 static bool is_xdp_supported(int ifindex)
@@ -2726,47 +319,6 @@ static bool is_xdp_supported(int ifindex)
 	return true;
 }
 
-static const struct test_spec tests[] = {
-	{.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
-	{.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
-	{.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
-	{.name = "POLL_RX", .test_func = testapp_poll_rx},
-	{.name = "POLL_TX", .test_func = testapp_poll_tx},
-	{.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
-	{.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
-	{.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
-	{.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
-	{.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
-	{.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
-	{.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
-	 .test_func = testapp_unaligned_inv_desc_4001_frame},
-	{.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
-	{.name = "TEARDOWN", .test_func = testapp_teardown},
-	{.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
-	{.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
-	{.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
-	{.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
-	{.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
-	{.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
-	{.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
-	{.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
-	{.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
-	{.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
-	{.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
-	{.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
-	 .test_func = testapp_send_receive_unaligned_mb},
-	{.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
-	{.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
-	{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
-	{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
-	{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
-	{.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
-	{.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
-	{.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
-	{.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
-	{.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
-	};
-
 static void print_tests(void)
 {
 	u32 i;
@@ -2774,10 +326,13 @@ static void print_tests(void)
 	printf("Tests:\n");
 	for (i = 0; i < ARRAY_SIZE(tests); i++)
 		printf("%u: %s\n", i, tests[i].name);
+	for (i = ARRAY_SIZE(tests); i < ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); i++)
+		printf("%u: %s\n", i, ci_skip_tests[i - ARRAY_SIZE(tests)].name);
 }
 
 int main(int argc, char **argv)
 {
+	const size_t total_tests = ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests);
 	struct pkt_stream *rx_pkt_stream_default;
 	struct pkt_stream *tx_pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
@@ -2805,7 +360,7 @@ int main(int argc, char **argv)
 		print_tests();
 		ksft_exit_xpass();
 	}
-	if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) {
+	if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= total_tests) {
 		ksft_print_msg("Error: test %u does not exist.\n", opt_run_test);
 		ksft_exit_xfail();
 	}
@@ -2830,10 +385,13 @@ int main(int argc, char **argv)
 		ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
 	}
 
-	init_iface(ifobj_rx, worker_testapp_validate_rx);
-	init_iface(ifobj_tx, worker_testapp_validate_tx);
+	if (init_iface(ifobj_rx, worker_testapp_validate_rx) ||
+	    init_iface(ifobj_tx, worker_testapp_validate_tx)) {
+		ksft_print_msg("Error : can't initialize interfaces\n");
+		ksft_exit_xfail();
+	}
 
-	test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+	test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
 	tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
 	rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
 	if (!tx_pkt_stream_default || !rx_pkt_stream_default)
@@ -2842,7 +400,7 @@ int main(int argc, char **argv)
 	test.rx_pkt_stream_default = rx_pkt_stream_default;
 
 	if (opt_run_test == RUN_ALL_TESTS)
-		nb_tests = ARRAY_SIZE(tests);
+		nb_tests = total_tests;
 	else
 		nb_tests = 1;
 	if (opt_mode == TEST_MODE_ALL) {
@@ -2864,11 +422,15 @@ int main(int argc, char **argv)
 		if (opt_mode != TEST_MODE_ALL && i != opt_mode)
 			continue;
 
-		for (j = 0; j < ARRAY_SIZE(tests); j++) {
+		for (j = 0; j < total_tests; j++) {
 			if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
 				continue;
 
-			test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+			if (j < ARRAY_SIZE(tests))
+				test_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+			else
+				test_init(&test, ifobj_tx, ifobj_rx, i,
+					  &ci_skip_tests[j - ARRAY_SIZE(tests)]);
 			run_pkt_test(&test);
 			usleep(USLEEP_MAX);
 
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 4df3a5d329ac..3ca518df23ad 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -22,169 +22,13 @@
 #define PF_XDP AF_XDP
 #endif
 
-#ifndef SO_BUSY_POLL_BUDGET
-#define SO_BUSY_POLL_BUDGET 70
-#endif
-
-#ifndef SO_PREFER_BUSY_POLL
-#define SO_PREFER_BUSY_POLL 69
-#endif
-
-#define TEST_PASS 0
-#define TEST_FAILURE -1
-#define TEST_CONTINUE 1
-#define TEST_SKIP 2
-#define MAX_INTERFACES 2
-#define MAX_INTERFACE_NAME_CHARS 16
-#define MAX_TEST_NAME_SIZE 48
 #define MAX_TEARDOWN_ITER 10
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
-#define MIN_PKT_SIZE 64
-#define MAX_ETH_PKT_SIZE 1518
 #define MAX_ETH_JUMBO_SIZE 9000
-#define USLEEP_MAX 10000
 #define SOCK_RECONF_CTR 10
-#define DEFAULT_BATCH_SIZE 64
-#define POLL_TMOUT 1000
-#define THREAD_TMOUT 3
-#define DEFAULT_PKT_CNT (4 * 1024)
-#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
 #define RX_FULL_RXQSIZE 32
 #define UMEM_HEADROOM_TEST_SIZE 128
 #define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
-#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
-#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
-#define XSK_DESC__INVALID_OPTION (0xffff)
-#define HUGEPAGE_SIZE (2 * 1024 * 1024)
-#define PKT_DUMP_NB_TO_PRINT 16
 #define RUN_ALL_TESTS UINT_MAX
 #define NUM_MAC_ADDRESSES 4
 
-#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-
-enum test_mode {
-	TEST_MODE_SKB,
-	TEST_MODE_DRV,
-	TEST_MODE_ZC,
-	TEST_MODE_ALL
-};
-
-struct xsk_umem_info {
-	struct xsk_ring_prod fq;
-	struct xsk_ring_cons cq;
-	struct xsk_umem *umem;
-	u64 next_buffer;
-	u32 num_frames;
-	u32 frame_headroom;
-	void *buffer;
-	u32 frame_size;
-	u32 base_addr;
-	u32 fill_size;
-	u32 comp_size;
-	bool unaligned_mode;
-};
-
-struct xsk_socket_info {
-	struct xsk_ring_cons rx;
-	struct xsk_ring_prod tx;
-	struct xsk_umem_info *umem;
-	struct xsk_socket *xsk;
-	struct pkt_stream *pkt_stream;
-	u32 outstanding_tx;
-	u32 rxqsize;
-	u32 batch_size;
-	u8 dst_mac[ETH_ALEN];
-	u8 src_mac[ETH_ALEN];
-	bool check_consumer;
-};
-
-struct pkt {
-	int offset;
-	u32 len;
-	u32 pkt_nb;
-	bool valid;
-	u16 options;
-};
-
-struct pkt_stream {
-	u32 nb_pkts;
-	u32 current_pkt_nb;
-	struct pkt *pkts;
-	u32 max_pkt_len;
-	u32 nb_rx_pkts;
-	u32 nb_valid_entries;
-	bool verbatim;
-};
-
-struct set_hw_ring {
-	u32 default_tx;
-	u32 default_rx;
-};
-
-struct ifobject;
-struct test_spec;
-typedef int (*validation_func_t)(struct ifobject *ifobj);
-typedef void *(*thread_func_t)(void *arg);
-typedef int (*test_func_t)(struct test_spec *test);
-
-struct ifobject {
-	char ifname[MAX_INTERFACE_NAME_CHARS];
-	struct xsk_socket_info *xsk;
-	struct xsk_socket_info *xsk_arr;
-	struct xsk_umem_info *umem;
-	thread_func_t func_ptr;
-	validation_func_t validation_func;
-	struct xsk_xdp_progs *xdp_progs;
-	struct bpf_map *xskmap;
-	struct bpf_program *xdp_prog;
-	struct ethtool_ringparam ring;
-	struct set_hw_ring set_ring;
-	enum test_mode mode;
-	int ifindex;
-	int mtu;
-	u32 bind_flags;
-	u32 xdp_zc_max_segs;
-	bool tx_on;
-	bool rx_on;
-	bool use_poll;
-	bool busy_poll;
-	bool use_fill_ring;
-	bool release_rx;
-	bool shared_umem;
-	bool use_metadata;
-	bool unaligned_supp;
-	bool multi_buff_supp;
-	bool multi_buff_zc_supp;
-	bool hw_ring_size_supp;
-};
-
-struct test_spec {
-	struct ifobject *ifobj_tx;
-	struct ifobject *ifobj_rx;
-	struct pkt_stream *tx_pkt_stream_default;
-	struct pkt_stream *rx_pkt_stream_default;
-	struct bpf_program *xdp_prog_rx;
-	struct bpf_program *xdp_prog_tx;
-	struct bpf_map *xskmap_rx;
-	struct bpf_map *xskmap_tx;
-	test_func_t test_func;
-	int mtu;
-	u16 total_steps;
-	u16 current_step;
-	u16 nb_sockets;
-	bool fail;
-	bool set_ring;
-	bool adjust_tail;
-	bool adjust_tail_support;
-	enum test_mode mode;
-	char name[MAX_TEST_NAME_SIZE];
-};
-
-pthread_barrier_t barr;
-pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-int pkts_in_flight;
-
-static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
-
 #endif				/* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/cachestat/.gitignore b/tools/testing/selftests/cachestat/.gitignore
index d6c30b43a4bb..abbb13b6e96b 100644
--- a/tools/testing/selftests/cachestat/.gitignore
+++ b/tools/testing/selftests/cachestat/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 test_cachestat
+tmpshmcstat
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index c952640f163b..ab838bcb9ec5 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -226,7 +226,7 @@ bool run_cachestat_test(enum file_type type)
 	int syscall_ret;
 	size_t compute_len = PS * 512;
 	struct cachestat_range cs_range = { PS, compute_len };
-	char *filename = "tmpshmcstat";
+	char *filename = "tmpshmcstat", *map;
 	struct cachestat cs;
 	bool ret = true;
 	int fd;
@@ -257,7 +257,7 @@ bool run_cachestat_test(enum file_type type)
 		}
 		break;
 	case FILE_MMAP:
-		char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
+		map = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
 				 MAP_SHARED, fd, 0);
 
 		if (map == MAP_FAILED) {
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 9dc90a1b386d..7ab2824ed7b5 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -25,6 +25,26 @@ static inline int values_close(long a, long b, int err)
 	return labs(a - b) <= (a + b) / 100 * err;
 }
 
+/*
+ * Checks if two given values differ by less than err% of their sum and assert
+ * with detailed debug info if not.
+ */
+static inline int values_close_report(long a, long b, int err)
+{
+	long diff  = labs(a - b);
+	long limit = (a + b) / 100 * err;
+	double actual_err = (a + b) ? (100.0 * diff / (a + b)) : 0.0;
+	int close = diff <= limit;
+
+	if (!close)
+		fprintf(stderr,
+			"[FAIL] actual=%ld expected=%ld | diff=%ld | limit=%ld | "
+			"tolerance=%d%% | actual_error=%.2f%%\n",
+			a, b, diff, limit, err, actual_err);
+
+	return close;
+}
+
 extern ssize_t read_text(const char *path, char *buf, size_t max_len);
 extern ssize_t write_text(const char *path, char *buf, ssize_t len);
 
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index a360e2eb2eef..1d778c8b7764 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -923,8 +923,10 @@ struct corecg_test {
 int main(int argc, char *argv[])
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) {
 		if (setup_named_v1_root(root, sizeof(root), CG_NAMED_NAME))
 			ksft_exit_skip("cgroup v2 isn't mounted and could not setup named v1 hierarchy\n");
@@ -946,12 +948,11 @@ post_v2_setup:
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
 	cleanup_named_v1_root(root);
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 2a60e6c41940..b1b30e82dd7c 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -219,7 +219,7 @@ static int test_cpucg_stats(const char *root)
 	if (user_usec <= 0)
 		goto cleanup;
 
-	if (!values_close(usage_usec, expected_usage_usec, 1))
+	if (!values_close_report(usage_usec, expected_usage_usec, 1))
 		goto cleanup;
 
 	ret = KSFT_PASS;
@@ -291,7 +291,7 @@ static int test_cpucg_nice(const char *root)
 
 		user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
 		nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec");
-		if (!values_close(nice_usec, expected_nice_usec, 1))
+		if (!values_close_report(nice_usec, expected_nice_usec, 1))
 			goto cleanup;
 
 		ret = KSFT_PASS;
@@ -404,7 +404,7 @@ overprovision_validate(const struct cpu_hogger *children, int num_children)
 			goto cleanup;
 
 		delta = children[i + 1].usage - children[i].usage;
-		if (!values_close(delta, children[0].usage, 35))
+		if (!values_close_report(delta, children[0].usage, 35))
 			goto cleanup;
 	}
 
@@ -444,7 +444,7 @@ underprovision_validate(const struct cpu_hogger *children, int num_children)
 	int ret = KSFT_FAIL, i;
 
 	for (i = 0; i < num_children - 1; i++) {
-		if (!values_close(children[i + 1].usage, children[0].usage, 15))
+		if (!values_close_report(children[i + 1].usage, children[0].usage, 15))
 			goto cleanup;
 	}
 
@@ -573,16 +573,16 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
 
 	nested_leaf_usage = leaf[1].usage + leaf[2].usage;
 	if (overprovisioned) {
-		if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
+		if (!values_close_report(leaf[0].usage, nested_leaf_usage, 15))
 			goto cleanup;
-	} else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
+	} else if (!values_close_report(leaf[0].usage * 2, nested_leaf_usage, 15))
 		goto cleanup;
 
 
 	child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
 	if (child_usage <= 0)
 		goto cleanup;
-	if (!values_close(child_usage, nested_leaf_usage, 1))
+	if (!values_close_report(child_usage, nested_leaf_usage, 1))
 		goto cleanup;
 
 	ret = KSFT_PASS;
@@ -691,7 +691,7 @@ static int test_cpucg_max(const char *root)
 	expected_usage_usec
 		= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
 
-	if (!values_close(usage_usec, expected_usage_usec, 10))
+	if (!values_close_report(usage_usec, expected_usage_usec, 10))
 		goto cleanup;
 
 	ret = KSFT_PASS;
@@ -762,7 +762,7 @@ static int test_cpucg_max_nested(const char *root)
 	expected_usage_usec
 		= n_periods * quota_usec + MIN(remainder_usec, quota_usec);
 
-	if (!values_close(usage_usec, expected_usage_usec, 10))
+	if (!values_close_report(usage_usec, expected_usage_usec, 10))
 		goto cleanup;
 
 	ret = KSFT_PASS;
@@ -796,8 +796,10 @@ struct cpucg_test {
 int main(int argc, char *argv[])
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), NULL))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 
@@ -814,11 +816,10 @@ int main(int argc, char *argv[])
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c
index 4034d14ba69a..8086d2ea394f 100644
--- a/tools/testing/selftests/cgroup/test_cpuset.c
+++ b/tools/testing/selftests/cgroup/test_cpuset.c
@@ -247,8 +247,10 @@ struct cpuset_test {
 int main(int argc, char *argv[])
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), NULL))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 
@@ -265,11 +267,10 @@ int main(int argc, char *argv[])
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index dfb763819581..465cdad2bfca 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -1488,8 +1488,10 @@ struct cgfreezer_test {
 int main(int argc, char *argv[])
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), NULL))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
@@ -1501,11 +1503,10 @@ int main(int argc, char *argv[])
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
index 0e5bb6c7307a..ed590b150a17 100644
--- a/tools/testing/selftests/cgroup/test_kill.c
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -274,8 +274,10 @@ struct cgkill_test {
 int main(int argc, char *argv[])
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), NULL))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
@@ -287,11 +289,10 @@ int main(int argc, char *argv[])
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 63b3c9aad399..d4c4a514ee43 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -421,8 +421,10 @@ struct kmem_test {
 int main(int argc, char **argv)
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), NULL))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 
@@ -446,11 +448,10 @@ int main(int argc, char **argv)
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index a680f773f2d5..b117325c0439 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -1650,8 +1650,10 @@ struct memcg_test {
 int main(int argc, char **argv)
 {
 	char root[PATH_MAX];
-	int i, proc_status, ret = EXIT_SUCCESS;
+	int i, proc_status;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), NULL))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 
@@ -1685,11 +1687,10 @@ int main(int argc, char **argv)
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index e1f578ca2841..86a8930b47e3 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -597,8 +597,10 @@ static bool zswap_configured(void)
 int main(int argc, char **argv)
 {
 	char root[PATH_MAX];
-	int i, ret = EXIT_SUCCESS;
+	int i;
 
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
 	if (cg_find_unified_root(root, sizeof(root), NULL))
 		ksft_exit_skip("cgroup v2 isn't mounted\n");
 
@@ -625,11 +627,10 @@ int main(int argc, char **argv)
 			ksft_test_result_skip("%s\n", tests[i].name);
 			break;
 		default:
-			ret = EXIT_FAILURE;
 			ksft_test_result_fail("%s\n", tests[i].name);
 			break;
 		}
 	}
 
-	return ret;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/coredump/.gitignore b/tools/testing/selftests/coredump/.gitignore
new file mode 100644
index 000000000000..097f52db0be9
--- /dev/null
+++ b/tools/testing/selftests/coredump/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+stackdump_test
+coredump_socket_test
+coredump_socket_protocol_test
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
index 77b3665c73c7..dece1a31d561 100644
--- a/tools/testing/selftests/coredump/Makefile
+++ b/tools/testing/selftests/coredump/Makefile
@@ -1,7 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
 
-TEST_GEN_PROGS := stackdump_test
+TEST_GEN_PROGS := stackdump_test \
+		  coredump_socket_test \
+		  coredump_socket_protocol_test
 TEST_FILES := stackdump
 
 include ../lib.mk
+
+$(OUTPUT)/stackdump_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_test: coredump_test_helpers.c
+$(OUTPUT)/coredump_socket_protocol_test: coredump_test_helpers.c
diff --git a/tools/testing/selftests/coredump/coredump_socket_protocol_test.c b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
new file mode 100644
index 000000000000..d19b6717c53e
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_protocol_test.c
@@ -0,0 +1,1568 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+#define NUM_CRASHING_COREDUMPS 5
+
+FIXTURE_SETUP(coredump)
+{
+	FILE *file;
+	int ret;
+
+	self->pid_coredump_server = -ESRCH;
+	self->fd_tmpfs_detached = -1;
+	file = fopen("/proc/sys/kernel/core_pattern", "r");
+	ASSERT_NE(NULL, file);
+
+	ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+	ASSERT_TRUE(ret || feof(file));
+	ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+	self->original_core_pattern[ret] = '\0';
+	self->fd_tmpfs_detached = create_detached_tmpfs();
+	ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+	ret = fclose(file);
+	ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+	const char *reason;
+	FILE *file;
+	int ret, status;
+
+	if (self->pid_coredump_server > 0) {
+		kill(self->pid_coredump_server, SIGTERM);
+		waitpid(self->pid_coredump_server, &status, 0);
+	}
+	unlink("/tmp/coredump.file");
+	unlink("/tmp/coredump.socket");
+
+	file = fopen("/proc/sys/kernel/core_pattern", "w");
+	if (!file) {
+		reason = "Unable to open core_pattern";
+		goto fail;
+	}
+
+	ret = fprintf(file, "%s", self->original_core_pattern);
+	if (ret < 0) {
+		reason = "Unable to write to core_pattern";
+		goto fail;
+	}
+
+	ret = fclose(file);
+	if (ret) {
+		reason = "Unable to close core_pattern";
+		goto fail;
+	}
+
+	if (self->fd_tmpfs_detached >= 0) {
+		ret = close(self->fd_tmpfs_detached);
+		if (ret < 0) {
+			reason = "Unable to close detached tmpfs";
+			goto fail;
+		}
+		self->fd_tmpfs_detached = -1;
+	}
+
+	return;
+fail:
+	/* This should never happen */
+	fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket_request_kernel)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct stat st;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_request_kernel: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_request_kernel: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_request_kernel: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_request_kernel: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_request_kernel: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_request_kernel: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_request_kernel: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		fd_core_file = creat("/tmp/coredump.file", 0644);
+		if (fd_core_file < 0) {
+			fprintf(stderr, "socket_request_kernel: creat coredump file failed: %m\n");
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_request_kernel: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+					COREDUMP_KERNEL | COREDUMP_USERSPACE |
+					COREDUMP_REJECT | COREDUMP_WAIT)) {
+			fprintf(stderr, "socket_request_kernel: check_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+			fprintf(stderr, "socket_request_kernel: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+			fprintf(stderr, "socket_request_kernel: read_marker COREDUMP_MARK_REQACK failed\n");
+			goto out;
+		}
+
+		for (;;) {
+			char buffer[4096];
+			ssize_t bytes_read, bytes_write;
+
+			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+			if (bytes_read < 0) {
+				fprintf(stderr, "socket_request_kernel: read from coredump socket failed: %m\n");
+				goto out;
+			}
+
+			if (bytes_read == 0)
+				break;
+
+			bytes_write = write(fd_core_file, buffer, bytes_read);
+			if (bytes_read != bytes_write) {
+				if (bytes_write < 0 && errno == ENOSPC)
+					continue;
+				fprintf(stderr, "socket_request_kernel: write to core file failed (read=%zd, write=%zd): %m\n",
+					bytes_read, bytes_write);
+				goto out;
+			}
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_request_kernel: completed successfully\n");
+out:
+		if (fd_core_file >= 0)
+			close(fd_core_file);
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_TRUE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+	ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+	ASSERT_GT(st.st_size, 0);
+	system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_request_userspace)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_request_userspace: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_request_userspace: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_request_userspace: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_request_userspace: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_request_userspace: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_request_userspace: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_request_userspace: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_request_userspace: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+					COREDUMP_KERNEL | COREDUMP_USERSPACE |
+					COREDUMP_REJECT | COREDUMP_WAIT)) {
+			fprintf(stderr, "socket_request_userspace: check_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_USERSPACE | COREDUMP_WAIT, 0)) {
+			fprintf(stderr, "socket_request_userspace: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+			fprintf(stderr, "socket_request_userspace: read_marker COREDUMP_MARK_REQACK failed\n");
+			goto out;
+		}
+
+		for (;;) {
+			char buffer[4096];
+			ssize_t bytes_read;
+
+			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+			if (bytes_read > 0) {
+				fprintf(stderr, "socket_request_userspace: unexpected data received (expected no coredump data)\n");
+				goto out;
+			}
+
+			if (bytes_read < 0) {
+				fprintf(stderr, "socket_request_userspace: read from coredump socket failed: %m\n");
+				goto out;
+			}
+
+			if (bytes_read == 0)
+				break;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_request_userspace: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_TRUE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_reject)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_request_reject: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_request_reject: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_request_reject: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_request_reject: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_request_reject: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_request_reject: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_request_reject: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_request_reject: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+					COREDUMP_KERNEL | COREDUMP_USERSPACE |
+					COREDUMP_REJECT | COREDUMP_WAIT)) {
+			fprintf(stderr, "socket_request_reject: check_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+			fprintf(stderr, "socket_request_reject: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+			fprintf(stderr, "socket_request_reject: read_marker COREDUMP_MARK_REQACK failed\n");
+			goto out;
+		}
+
+		for (;;) {
+			char buffer[4096];
+			ssize_t bytes_read;
+
+			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+			if (bytes_read > 0) {
+				fprintf(stderr, "socket_request_reject: unexpected data received (expected no coredump data for REJECT)\n");
+				goto out;
+			}
+
+			if (bytes_read < 0) {
+				fprintf(stderr, "socket_request_reject: read from coredump socket failed: %m\n");
+				goto out;
+			}
+
+			if (bytes_read == 0)
+				break;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_request_reject: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_FALSE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_flag_combination)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+					COREDUMP_KERNEL | COREDUMP_USERSPACE |
+					COREDUMP_REJECT | COREDUMP_WAIT)) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: check_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING)) {
+			fprintf(stderr, "socket_request_invalid_flag_combination: read_marker COREDUMP_MARK_CONFLICTING failed\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_request_invalid_flag_combination: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_FALSE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_unknown_flag)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_request_unknown_flag: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_request_unknown_flag: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_request_unknown_flag: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_request_unknown_flag: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_request_unknown_flag: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_request_unknown_flag: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_request_unknown_flag: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_request_unknown_flag: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+					COREDUMP_KERNEL | COREDUMP_USERSPACE |
+					COREDUMP_REJECT | COREDUMP_WAIT)) {
+			fprintf(stderr, "socket_request_unknown_flag: check_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0)) {
+			fprintf(stderr, "socket_request_unknown_flag: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED)) {
+			fprintf(stderr, "socket_request_unknown_flag: read_marker COREDUMP_MARK_UNSUPPORTED failed\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_request_unknown_flag: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_FALSE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_small)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_request_invalid_size_small: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_request_invalid_size_small: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_request_invalid_size_small: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_request_invalid_size_small: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_request_invalid_size_small: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_request_invalid_size_small: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_request_invalid_size_small: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_request_invalid_size_small: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+					COREDUMP_KERNEL | COREDUMP_USERSPACE |
+					COREDUMP_REJECT | COREDUMP_WAIT)) {
+			fprintf(stderr, "socket_request_invalid_size_small: check_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_REJECT | COREDUMP_WAIT,
+				       COREDUMP_ACK_SIZE_VER0 / 2)) {
+			fprintf(stderr, "socket_request_invalid_size_small: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE)) {
+			fprintf(stderr, "socket_request_invalid_size_small: read_marker COREDUMP_MARK_MINSIZE failed\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_request_invalid_size_small: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_FALSE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_request_invalid_size_large)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_request_invalid_size_large: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_request_invalid_size_large: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_request_invalid_size_large: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_request_invalid_size_large: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_request_invalid_size_large: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_request_invalid_size_large: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_request_invalid_size_large: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_request_invalid_size_large: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+					COREDUMP_KERNEL | COREDUMP_USERSPACE |
+					COREDUMP_REJECT | COREDUMP_WAIT)) {
+			fprintf(stderr, "socket_request_invalid_size_large: check_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_REJECT | COREDUMP_WAIT,
+				       COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE)) {
+			fprintf(stderr, "socket_request_invalid_size_large: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE)) {
+			fprintf(stderr, "socket_request_invalid_size_large: read_marker COREDUMP_MARK_MAXSIZE failed\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_request_invalid_size_large: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_FALSE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGSEGV
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		/* Verify coredump_signal is available and correct */
+		if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+			goto out;
+		}
+
+		if (info.coredump_signal != SIGSEGV) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+				info.coredump_signal, SIGSEGV);
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: read_marker COREDUMP_MARK_REQACK failed\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+	ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via socket coredump with SIGABRT
+ *
+ * Verify that when using socket-based coredump protocol,
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		struct coredump_req req = {};
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		/* Verify coredump_signal is available and correct */
+		if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+			goto out;
+		}
+
+		if (info.coredump_signal != SIGABRT) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+				info.coredump_signal, SIGABRT);
+			goto out;
+		}
+
+		if (!read_coredump_req(fd_coredump, &req)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: read_coredump_req failed\n");
+			goto out;
+		}
+
+		if (!send_coredump_ack(fd_coredump, &req,
+				       COREDUMP_REJECT | COREDUMP_WAIT, 0)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: send_coredump_ack failed\n");
+			goto out;
+		}
+
+		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: read_marker COREDUMP_MARK_REQACK failed\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		abort();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGABRT);
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+	ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
+{
+	int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+	pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+		int exit_code = EXIT_FAILURE;
+		struct coredump_req req = {};
+
+		close(ipc_sockets[0]);
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "Failed to create and listen on unix socket\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "Failed to notify parent via ipc socket\n");
+			goto out;
+		}
+		close(ipc_sockets[1]);
+
+		for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+			fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+			if (fd_coredump < 0) {
+				fprintf(stderr, "accept4 failed: %m\n");
+				goto out;
+			}
+
+			fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+			if (fd_peer_pidfd < 0) {
+				fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
+				goto out;
+			}
+
+			if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+				fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
+				goto out;
+			}
+
+			if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+				fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
+				goto out;
+			}
+			if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+				fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
+				goto out;
+			}
+
+			if (!read_coredump_req(fd_coredump, &req)) {
+				fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
+				goto out;
+			}
+
+			if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+						COREDUMP_KERNEL | COREDUMP_USERSPACE |
+						COREDUMP_REJECT | COREDUMP_WAIT)) {
+				fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
+				goto out;
+			}
+
+			if (!send_coredump_ack(fd_coredump, &req,
+					       COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+				fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
+				goto out;
+			}
+
+			if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+				fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
+				goto out;
+			}
+
+			fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+			if (fd_core_file < 0) {
+				fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
+				goto out;
+			}
+
+			for (;;) {
+				char buffer[4096];
+				ssize_t bytes_read, bytes_write;
+
+				bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+				if (bytes_read < 0) {
+					fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
+					goto out;
+				}
+
+				if (bytes_read == 0)
+					break;
+
+				bytes_write = write(fd_core_file, buffer, bytes_read);
+				if (bytes_read != bytes_write) {
+					if (bytes_write < 0 && errno == ENOSPC)
+						continue;
+					fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
+					goto out;
+				}
+			}
+
+			close(fd_core_file);
+			close(fd_peer_pidfd);
+			close(fd_coredump);
+			fd_peer_pidfd = -1;
+			fd_coredump = -1;
+		}
+
+		exit_code = EXIT_SUCCESS;
+out:
+		if (fd_core_file >= 0)
+			close(fd_core_file);
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+		pid[i] = fork();
+		ASSERT_GE(pid[i], 0);
+		if (pid[i] == 0)
+			crashing_child();
+		pidfd[i] = sys_pidfd_open(pid[i], 0);
+		ASSERT_GE(pidfd[i], 0);
+	}
+
+	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+		waitpid(pid[i], &status[i], 0);
+		ASSERT_TRUE(WIFSIGNALED(status[i]));
+		ASSERT_TRUE(WCOREDUMP(status[i]));
+	}
+
+	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+		info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+		ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+		ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+		ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+	}
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
+{
+	int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
+	pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
+		fd_server = -1;
+		exit_code = EXIT_FAILURE;
+		n_conns = 0;
+		close(ipc_sockets[0]);
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+		close(ipc_sockets[1]);
+
+		while (n_conns < NUM_CRASHING_COREDUMPS) {
+			int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+			struct coredump_req req = {};
+			fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+			if (fd_coredump < 0) {
+				if (errno == EAGAIN || errno == EWOULDBLOCK)
+					continue;
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: accept4 failed: %m\n");
+				goto out;
+			}
+			fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+			if (fd_peer_pidfd < 0) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_peer_pidfd failed\n");
+				goto out;
+			}
+			if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: get_pidfd_info failed\n");
+				goto out;
+			}
+			if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED)) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: missing PIDFD_INFO_COREDUMP or PIDFD_COREDUMPED\n");
+				goto out;
+			}
+			if (!read_coredump_req(fd_coredump, &req)) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_coredump_req failed\n");
+				goto out;
+			}
+			if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
+						COREDUMP_KERNEL | COREDUMP_USERSPACE |
+						COREDUMP_REJECT | COREDUMP_WAIT)) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: check_coredump_req failed\n");
+				goto out;
+			}
+			if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: send_coredump_ack failed\n");
+				goto out;
+			}
+			if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: read_marker failed\n");
+				goto out;
+			}
+			fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+			if (fd_core_file < 0) {
+				fprintf(stderr, "socket_multiple_crashing_coredumps_epoll_workers: open_coredump_tmpfile failed: %m\n");
+				goto out;
+			}
+			pid_t worker = fork();
+			if (worker == 0) {
+				close(fd_server);
+				process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
+			}
+			worker_pids[n_conns] = worker;
+			if (fd_coredump >= 0)
+				close(fd_coredump);
+			if (fd_peer_pidfd >= 0)
+				close(fd_peer_pidfd);
+			if (fd_core_file >= 0)
+				close(fd_core_file);
+			n_conns++;
+		}
+		exit_code = EXIT_SUCCESS;
+out:
+		if (fd_server >= 0)
+			close(fd_server);
+
+		// Reap all worker processes
+		for (int i = 0; i < n_conns; i++) {
+			int wstatus;
+			if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
+				fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
+			} else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
+				fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
+				exit_code = EXIT_FAILURE;
+			}
+		}
+
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+		pid[i] = fork();
+		ASSERT_GE(pid[i], 0);
+		if (pid[i] == 0)
+			crashing_child();
+		pidfd[i] = sys_pidfd_open(pid[i], 0);
+		ASSERT_GE(pidfd[i], 0);
+	}
+
+	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+		ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
+		ASSERT_TRUE(WIFSIGNALED(status[i]));
+		ASSERT_TRUE(WCOREDUMP(status[i]));
+	}
+
+	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
+		info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+		ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
+		ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+		ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+	}
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_socket_test.c b/tools/testing/selftests/coredump/coredump_socket_test.c
new file mode 100644
index 000000000000..7e26d4a6a15d
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_socket_test.c
@@ -0,0 +1,742 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/stat.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "coredump_test.h"
+
+FIXTURE_SETUP(coredump)
+{
+	FILE *file;
+	int ret;
+
+	self->pid_coredump_server = -ESRCH;
+	self->fd_tmpfs_detached = -1;
+	file = fopen("/proc/sys/kernel/core_pattern", "r");
+	ASSERT_NE(NULL, file);
+
+	ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+	ASSERT_TRUE(ret || feof(file));
+	ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+	self->original_core_pattern[ret] = '\0';
+	self->fd_tmpfs_detached = create_detached_tmpfs();
+	ASSERT_GE(self->fd_tmpfs_detached, 0);
+
+	ret = fclose(file);
+	ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+	const char *reason;
+	FILE *file;
+	int ret, status;
+
+	if (self->pid_coredump_server > 0) {
+		kill(self->pid_coredump_server, SIGTERM);
+		waitpid(self->pid_coredump_server, &status, 0);
+	}
+	unlink("/tmp/coredump.file");
+	unlink("/tmp/coredump.socket");
+
+	file = fopen("/proc/sys/kernel/core_pattern", "w");
+	if (!file) {
+		reason = "Unable to open core_pattern";
+		goto fail;
+	}
+
+	ret = fprintf(file, "%s", self->original_core_pattern);
+	if (ret < 0) {
+		reason = "Unable to write to core_pattern";
+		goto fail;
+	}
+
+	ret = fclose(file);
+	if (ret) {
+		reason = "Unable to close core_pattern";
+		goto fail;
+	}
+
+	if (self->fd_tmpfs_detached >= 0) {
+		ret = close(self->fd_tmpfs_detached);
+		if (ret < 0) {
+			reason = "Unable to close detached tmpfs";
+			goto fail;
+		}
+		self->fd_tmpfs_detached = -1;
+	}
+
+	return;
+fail:
+	/* This should never happen */
+	fprintf(stderr, "Failed to cleanup coredump test: %s\n", reason);
+}
+
+TEST_F(coredump, socket)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct stat st;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket test: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket test: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket test: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket test: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket test: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket test: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket test: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		fd_core_file = creat("/tmp/coredump.file", 0644);
+		if (fd_core_file < 0) {
+			fprintf(stderr, "socket test: creat coredump file failed: %m\n");
+			goto out;
+		}
+
+		for (;;) {
+			char buffer[4096];
+			ssize_t bytes_read, bytes_write;
+
+			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+			if (bytes_read < 0) {
+				fprintf(stderr, "socket test: read from coredump socket failed: %m\n");
+				goto out;
+			}
+
+			if (bytes_read == 0)
+				break;
+
+			bytes_write = write(fd_core_file, buffer, bytes_read);
+			if (bytes_read != bytes_write) {
+				if (bytes_write < 0 && errno == ENOSPC)
+					continue;
+				fprintf(stderr, "socket test: write to core file failed (read=%zd, write=%zd): %m\n", bytes_read, bytes_write);
+				goto out;
+			}
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket test: completed successfully\n");
+out:
+		if (fd_core_file >= 0)
+			close(fd_core_file);
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_TRUE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+	ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+	ASSERT_GT(st.st_size, 0);
+}
+
+TEST_F(coredump, socket_detect_userspace_client)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct stat st;
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_COREDUMP,
+	};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_detect_userspace_client: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_detect_userspace_client: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_detect_userspace_client: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_detect_userspace_client: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_detect_userspace_client: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_detect_userspace_client: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (info.coredump_mask & PIDFD_COREDUMPED) {
+			fprintf(stderr, "socket_detect_userspace_client: PIDFD_COREDUMPED incorrectly set (should be userspace client)\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_detect_userspace_client: completed successfully\n");
+out:
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		int fd_socket;
+		ssize_t ret;
+		const struct sockaddr_un coredump_sk = {
+			.sun_family = AF_UNIX,
+			.sun_path = "/tmp/coredump.socket",
+		};
+		size_t coredump_sk_len =
+			offsetof(struct sockaddr_un, sun_path) +
+			sizeof("/tmp/coredump.socket");
+
+		fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+		if (fd_socket < 0) {
+			fprintf(stderr, "socket_detect_userspace_client (client): socket failed: %m\n");
+			_exit(EXIT_FAILURE);
+		}
+
+		ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+		if (ret < 0) {
+			fprintf(stderr, "socket_detect_userspace_client (client): connect failed: %m\n");
+			_exit(EXIT_FAILURE);
+		}
+
+		close(fd_socket);
+		pause();
+		fprintf(stderr, "socket_detect_userspace_client (client): completed successfully\n");
+		_exit(EXIT_SUCCESS);
+	}
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+	ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+
+	ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+	ASSERT_EQ(close(pidfd), 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+	ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
+	ASSERT_EQ(errno, ENOENT);
+}
+
+TEST_F(coredump, socket_enoent)
+{
+	int pidfd, status;
+	pid_t pid;
+
+	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_FALSE(WCOREDUMP(status));
+}
+
+TEST_F(coredump, socket_no_listener)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	int ipc_sockets[2];
+	char c;
+	const struct sockaddr_un coredump_sk = {
+		.sun_family = AF_UNIX,
+		.sun_path = "/tmp/coredump.socket",
+	};
+	size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+				 sizeof("/tmp/coredump.socket");
+
+	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		int fd_server = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_no_listener: socket failed: %m\n");
+			goto out;
+		}
+
+		ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+		if (ret < 0) {
+			fprintf(stderr, "socket_no_listener: bind failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_no_listener: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_no_listener: completed successfully\n");
+out:
+		if (fd_server >= 0)
+			close(fd_server);
+		close(ipc_sockets[1]);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_FALSE(WCOREDUMP(status));
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGSEGV.
+ */
+TEST_F(coredump, socket_coredump_signal_sigsegv)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		/* Verify coredump_signal is available and correct */
+		if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+			goto out;
+		}
+
+		if (info.coredump_signal != SIGSEGV) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: coredump_signal=%d, expected SIGSEGV=%d\n",
+				info.coredump_signal, SIGSEGV);
+			goto out;
+		}
+
+		fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+		if (fd_core_file < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigsegv: open_coredump_tmpfile failed: %m\n");
+			goto out;
+		}
+
+		for (;;) {
+			char buffer[4096];
+			ssize_t bytes_read, bytes_write;
+
+			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+			if (bytes_read < 0) {
+				fprintf(stderr, "socket_coredump_signal_sigsegv: read from coredump socket failed: %m\n");
+				goto out;
+			}
+
+			if (bytes_read == 0)
+				break;
+
+			bytes_write = write(fd_core_file, buffer, bytes_read);
+			if (bytes_read != bytes_write) {
+				fprintf(stderr, "socket_coredump_signal_sigsegv: write to core file failed (read=%zd, write=%zd): %m\n",
+					bytes_read, bytes_write);
+				goto out;
+			}
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_coredump_signal_sigsegv: completed successfully\n");
+out:
+		if (fd_core_file >= 0)
+			close(fd_core_file);
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		crashing_child();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGSEGV);
+	ASSERT_TRUE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+	ASSERT_EQ(info.coredump_signal, SIGSEGV);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+/*
+ * Test: PIDFD_INFO_COREDUMP_SIGNAL via simple socket coredump with SIGABRT
+ *
+ * Verify that when using simple socket-based coredump (@ pattern),
+ * the coredump_signal field is correctly exposed as SIGABRT.
+ */
+TEST_F(coredump, socket_coredump_signal_sigabrt)
+{
+	int pidfd, ret, status;
+	pid_t pid, pid_coredump_server;
+	struct pidfd_info info = {};
+	int ipc_sockets[2];
+	char c;
+
+	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
+
+	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_EQ(ret, 0);
+
+	pid_coredump_server = fork();
+	ASSERT_GE(pid_coredump_server, 0);
+	if (pid_coredump_server == 0) {
+		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
+		int exit_code = EXIT_FAILURE;
+
+		close(ipc_sockets[0]);
+
+		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
+		if (fd_server < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: create_and_listen_unix_socket failed: %m\n");
+			goto out;
+		}
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: write_nointr to ipc socket failed: %m\n");
+			goto out;
+		}
+
+		close(ipc_sockets[1]);
+
+		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+		if (fd_coredump < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: accept4 failed: %m\n");
+			goto out;
+		}
+
+		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
+		if (fd_peer_pidfd < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: get_peer_pidfd failed\n");
+			goto out;
+		}
+
+		if (!get_pidfd_info(fd_peer_pidfd, &info)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: get_pidfd_info failed\n");
+			goto out;
+		}
+
+		if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP not set in mask\n");
+			goto out;
+		}
+
+		if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_COREDUMPED not set in coredump_mask\n");
+			goto out;
+		}
+
+		/* Verify coredump_signal is available and correct */
+		if (!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL)) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: PIDFD_INFO_COREDUMP_SIGNAL not set in mask\n");
+			goto out;
+		}
+
+		if (info.coredump_signal != SIGABRT) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: coredump_signal=%d, expected SIGABRT=%d\n",
+				info.coredump_signal, SIGABRT);
+			goto out;
+		}
+
+		fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
+		if (fd_core_file < 0) {
+			fprintf(stderr, "socket_coredump_signal_sigabrt: open_coredump_tmpfile failed: %m\n");
+			goto out;
+		}
+
+		for (;;) {
+			char buffer[4096];
+			ssize_t bytes_read, bytes_write;
+
+			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+			if (bytes_read < 0) {
+				fprintf(stderr, "socket_coredump_signal_sigabrt: read from coredump socket failed: %m\n");
+				goto out;
+			}
+
+			if (bytes_read == 0)
+				break;
+
+			bytes_write = write(fd_core_file, buffer, bytes_read);
+			if (bytes_read != bytes_write) {
+				fprintf(stderr, "socket_coredump_signal_sigabrt: write to core file failed (read=%zd, write=%zd): %m\n",
+					bytes_read, bytes_write);
+				goto out;
+			}
+		}
+
+		exit_code = EXIT_SUCCESS;
+		fprintf(stderr, "socket_coredump_signal_sigabrt: completed successfully\n");
+out:
+		if (fd_core_file >= 0)
+			close(fd_core_file);
+		if (fd_peer_pidfd >= 0)
+			close(fd_peer_pidfd);
+		if (fd_coredump >= 0)
+			close(fd_coredump);
+		if (fd_server >= 0)
+			close(fd_server);
+		_exit(exit_code);
+	}
+	self->pid_coredump_server = pid_coredump_server;
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0)
+		abort();
+
+	pidfd = sys_pidfd_open(pid, 0);
+	ASSERT_GE(pidfd, 0);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGABRT);
+	ASSERT_TRUE(WCOREDUMP(status));
+
+	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_COREDUMP_SIGNAL));
+	ASSERT_EQ(info.coredump_signal, SIGABRT);
+
+	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
+}
+
+TEST_F(coredump, socket_invalid_paths)
+{
+	ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
+	ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
+	ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
+	ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
+	ASSERT_FALSE(set_core_pattern("@.."));
+
+	ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
+	ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
+	ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
+	ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
+	ASSERT_FALSE(set_core_pattern("@@.."));
+
+	ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/coredump/coredump_test.h b/tools/testing/selftests/coredump/coredump_test.h
new file mode 100644
index 000000000000..ed47f01fa53c
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __COREDUMP_TEST_H
+#define __COREDUMP_TEST_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+#include <linux/coredump.h>
+
+#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+/* Coredump fixture */
+FIXTURE(coredump)
+{
+	char original_core_pattern[256];
+	pid_t pid_coredump_server;
+	int fd_tmpfs_detached;
+};
+
+/* Shared helper function declarations */
+void *do_nothing(void *arg);
+void crashing_child(void);
+int create_detached_tmpfs(void);
+int create_and_listen_unix_socket(const char *path);
+bool set_core_pattern(const char *pattern);
+int get_peer_pidfd(int fd);
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info);
+
+/* Inline helper that uses harness types */
+static inline void wait_and_check_coredump_server(pid_t pid_coredump_server,
+						   struct __test_metadata *const _metadata,
+						   FIXTURE_DATA(coredump) *self)
+{
+	int status;
+	waitpid(pid_coredump_server, &status, 0);
+	self->pid_coredump_server = -ESRCH;
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
+/* Protocol helper function declarations */
+ssize_t recv_marker(int fd);
+bool read_marker(int fd, enum coredump_mark mark);
+bool read_coredump_req(int fd, struct coredump_req *req);
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+		       __u64 mask, size_t size_ack);
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+			__u64 required_mask);
+int open_coredump_tmpfile(int fd_tmpfs_detached);
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file);
+
+#endif /* __COREDUMP_TEST_H */
diff --git a/tools/testing/selftests/coredump/coredump_test_helpers.c b/tools/testing/selftests/coredump/coredump_test_helpers.c
new file mode 100644
index 000000000000..a6f6d5f2ae07
--- /dev/null
+++ b/tools/testing/selftests/coredump/coredump_test_helpers.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../filesystems/wrappers.h"
+#include "../pidfd/pidfd.h"
+
+/* Forward declarations to avoid including harness header */
+struct __test_metadata;
+
+/* Match the fixture definition from coredump_test.h */
+struct _fixture_coredump_data {
+	char original_core_pattern[256];
+	pid_t pid_coredump_server;
+	int fd_tmpfs_detached;
+};
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#define NUM_THREAD_SPAWN 128
+
+void *do_nothing(void *arg)
+{
+	(void)arg;
+	while (1)
+		pause();
+
+	return NULL;
+}
+
+void crashing_child(void)
+{
+	pthread_t thread;
+	int i;
+
+	for (i = 0; i < NUM_THREAD_SPAWN; ++i)
+		pthread_create(&thread, NULL, do_nothing, NULL);
+
+	/* crash on purpose */
+	i = *(int *)NULL;
+}
+
+int create_detached_tmpfs(void)
+{
+	int fd_context, fd_tmpfs;
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	if (fd_context < 0)
+		return -1;
+
+	if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
+		return -1;
+
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	close(fd_context);
+	return fd_tmpfs;
+}
+
+int create_and_listen_unix_socket(const char *path)
+{
+	struct sockaddr_un addr = {
+		.sun_family = AF_UNIX,
+	};
+	assert(strlen(path) < sizeof(addr.sun_path) - 1);
+	strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
+	size_t addr_len =
+		offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
+	int fd, ret;
+
+	fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+	if (fd < 0)
+		goto out;
+
+	ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
+	if (ret < 0)
+		goto out;
+
+	ret = listen(fd, 128);
+	if (ret < 0)
+		goto out;
+
+	return fd;
+
+out:
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+bool set_core_pattern(const char *pattern)
+{
+	int fd;
+	ssize_t ret;
+
+	fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
+	if (fd < 0)
+		return false;
+
+	ret = write(fd, pattern, strlen(pattern));
+	close(fd);
+	if (ret < 0)
+		return false;
+
+	fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
+	return ret == strlen(pattern);
+}
+
+int get_peer_pidfd(int fd)
+{
+	int fd_peer_pidfd;
+	socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+	int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
+			     &fd_peer_pidfd_len);
+	if (ret < 0) {
+		fprintf(stderr, "get_peer_pidfd: getsockopt(SO_PEERPIDFD) failed: %m\n");
+		return -1;
+	}
+	fprintf(stderr, "get_peer_pidfd: successfully retrieved pidfd %d\n", fd_peer_pidfd);
+	return fd_peer_pidfd;
+}
+
+bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
+{
+	int ret;
+	memset(info, 0, sizeof(*info));
+	info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP | PIDFD_INFO_COREDUMP_SIGNAL;
+	ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info);
+	if (ret < 0) {
+		fprintf(stderr, "get_pidfd_info: ioctl(PIDFD_GET_INFO) failed: %m\n");
+		return false;
+	}
+	fprintf(stderr, "get_pidfd_info: mask=0x%llx, coredump_mask=0x%x, coredump_signal=%d\n",
+		(unsigned long long)info->mask, info->coredump_mask, info->coredump_signal);
+	return true;
+}
+
+/* Protocol helper functions */
+
+ssize_t recv_marker(int fd)
+{
+	enum coredump_mark mark = COREDUMP_MARK_REQACK;
+	ssize_t ret;
+
+	ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
+	if (ret != sizeof(mark))
+		return -1;
+
+	switch (mark) {
+	case COREDUMP_MARK_REQACK:
+		fprintf(stderr, "Received marker: ReqAck\n");
+		return COREDUMP_MARK_REQACK;
+	case COREDUMP_MARK_MINSIZE:
+		fprintf(stderr, "Received marker: MinSize\n");
+		return COREDUMP_MARK_MINSIZE;
+	case COREDUMP_MARK_MAXSIZE:
+		fprintf(stderr, "Received marker: MaxSize\n");
+		return COREDUMP_MARK_MAXSIZE;
+	case COREDUMP_MARK_UNSUPPORTED:
+		fprintf(stderr, "Received marker: Unsupported\n");
+		return COREDUMP_MARK_UNSUPPORTED;
+	case COREDUMP_MARK_CONFLICTING:
+		fprintf(stderr, "Received marker: Conflicting\n");
+		return COREDUMP_MARK_CONFLICTING;
+	default:
+		fprintf(stderr, "Received unknown marker: %u\n", mark);
+		break;
+	}
+	return -1;
+}
+
+bool read_marker(int fd, enum coredump_mark mark)
+{
+	ssize_t ret;
+
+	ret = recv_marker(fd);
+	if (ret < 0)
+		return false;
+	return ret == mark;
+}
+
+bool read_coredump_req(int fd, struct coredump_req *req)
+{
+	ssize_t ret;
+	size_t field_size, user_size, ack_size, kernel_size, remaining_size;
+
+	memset(req, 0, sizeof(*req));
+	field_size = sizeof(req->size);
+
+	/* Peek the size of the coredump request. */
+	ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
+	if (ret != field_size) {
+		fprintf(stderr, "read_coredump_req: peek failed (got %zd, expected %zu): %m\n",
+			ret, field_size);
+		return false;
+	}
+	kernel_size = req->size;
+
+	if (kernel_size < COREDUMP_ACK_SIZE_VER0) {
+		fprintf(stderr, "read_coredump_req: kernel_size %zu < min %d\n",
+			kernel_size, COREDUMP_ACK_SIZE_VER0);
+		return false;
+	}
+	if (kernel_size >= PAGE_SIZE) {
+		fprintf(stderr, "read_coredump_req: kernel_size %zu >= PAGE_SIZE %d\n",
+			kernel_size, PAGE_SIZE);
+		return false;
+	}
+
+	/* Use the minimum of user and kernel size to read the full request. */
+	user_size = sizeof(struct coredump_req);
+	ack_size = user_size < kernel_size ? user_size : kernel_size;
+	ret = recv(fd, req, ack_size, MSG_WAITALL);
+	if (ret != ack_size)
+		return false;
+
+	fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
+		req->size, (unsigned long long)req->mask);
+
+	if (user_size > kernel_size)
+		remaining_size = user_size - kernel_size;
+	else
+		remaining_size = kernel_size - user_size;
+
+	if (PAGE_SIZE <= remaining_size)
+		return false;
+
+	/*
+	 * Discard any additional data if the kernel's request was larger than
+	 * what we knew about or cared about.
+	 */
+	if (remaining_size) {
+		char buffer[PAGE_SIZE];
+
+		ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
+		if (ret != remaining_size)
+			return false;
+		fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
+	}
+
+	return true;
+}
+
+bool send_coredump_ack(int fd, const struct coredump_req *req,
+		       __u64 mask, size_t size_ack)
+{
+	ssize_t ret;
+	/*
+	 * Wrap struct coredump_ack in a larger struct so we can
+	 * simulate sending to much data to the kernel.
+	 */
+	struct large_ack_for_size_testing {
+		struct coredump_ack ack;
+		char buffer[PAGE_SIZE];
+	} large_ack = {};
+
+	if (!size_ack)
+		size_ack = sizeof(struct coredump_ack) < req->size_ack ?
+				   sizeof(struct coredump_ack) :
+				   req->size_ack;
+	large_ack.ack.mask = mask;
+	large_ack.ack.size = size_ack;
+	ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
+	if (ret != size_ack)
+		return false;
+
+	fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
+		size_ack, (unsigned long long)mask);
+	return true;
+}
+
+bool check_coredump_req(const struct coredump_req *req, size_t min_size,
+			__u64 required_mask)
+{
+	if (req->size < min_size)
+		return false;
+	if ((req->mask & required_mask) != required_mask)
+		return false;
+	if (req->mask & ~required_mask)
+		return false;
+	return true;
+}
+
+int open_coredump_tmpfile(int fd_tmpfs_detached)
+{
+	return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
+}
+
+void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
+{
+	int epfd = -1;
+	int exit_code = EXIT_FAILURE;
+	struct epoll_event ev;
+	int flags;
+
+	/* Set socket to non-blocking mode for edge-triggered epoll */
+	flags = fcntl(fd_coredump, F_GETFL, 0);
+	if (flags < 0) {
+		fprintf(stderr, "Worker: fcntl(F_GETFL) failed: %m\n");
+		goto out;
+	}
+	if (fcntl(fd_coredump, F_SETFL, flags | O_NONBLOCK) < 0) {
+		fprintf(stderr, "Worker: fcntl(F_SETFL, O_NONBLOCK) failed: %m\n");
+		goto out;
+	}
+
+	epfd = epoll_create1(0);
+	if (epfd < 0) {
+		fprintf(stderr, "Worker: epoll_create1() failed: %m\n");
+		goto out;
+	}
+
+	ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
+	ev.data.fd = fd_coredump;
+	if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0) {
+		fprintf(stderr, "Worker: epoll_ctl(EPOLL_CTL_ADD) failed: %m\n");
+		goto out;
+	}
+
+	for (;;) {
+		struct epoll_event events[1];
+		int n = epoll_wait(epfd, events, 1, -1);
+		if (n < 0) {
+			fprintf(stderr, "Worker: epoll_wait() failed: %m\n");
+			break;
+		}
+
+		if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
+			for (;;) {
+				char buffer[4096];
+				ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+				if (bytes_read < 0) {
+					if (errno == EAGAIN || errno == EWOULDBLOCK)
+						break;
+					fprintf(stderr, "Worker: read() failed: %m\n");
+					goto out;
+				}
+				if (bytes_read == 0)
+					goto done;
+				ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
+				if (bytes_write != bytes_read) {
+					if (bytes_write < 0 && errno == ENOSPC)
+						continue;
+					fprintf(stderr, "Worker: write() failed (read=%zd, write=%zd): %m\n",
+						bytes_read, bytes_write);
+					goto out;
+				}
+			}
+		}
+	}
+
+done:
+	exit_code = EXIT_SUCCESS;
+	fprintf(stderr, "Worker: completed successfully\n");
+out:
+	if (epfd >= 0)
+		close(epfd);
+	if (fd_core_file >= 0)
+		close(fd_core_file);
+	if (fd_peer_pidfd >= 0)
+		close(fd_peer_pidfd);
+	if (fd_coredump >= 0)
+		close(fd_coredump);
+	_exit(exit_code);
+}
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index a4ac80bb1003..c2e895bcc160 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -23,57 +23,15 @@
 #include "../filesystems/wrappers.h"
 #include "../pidfd/pidfd.h"
 
+#include "coredump_test.h"
+
 #define STACKDUMP_FILE "stack_values"
 #define STACKDUMP_SCRIPT "stackdump"
-#define NUM_THREAD_SPAWN 128
 
 #ifndef PAGE_SIZE
 #define PAGE_SIZE 4096
 #endif
 
-static void *do_nothing(void *)
-{
-	while (1)
-		pause();
-
-	return NULL;
-}
-
-static void crashing_child(void)
-{
-	pthread_t thread;
-	int i;
-
-	for (i = 0; i < NUM_THREAD_SPAWN; ++i)
-		pthread_create(&thread, NULL, do_nothing, NULL);
-
-	/* crash on purpose */
-	i = *(int *)NULL;
-}
-
-FIXTURE(coredump)
-{
-	char original_core_pattern[256];
-	pid_t pid_coredump_server;
-	int fd_tmpfs_detached;
-};
-
-static int create_detached_tmpfs(void)
-{
-	int fd_context, fd_tmpfs;
-
-	fd_context = sys_fsopen("tmpfs", 0);
-	if (fd_context < 0)
-		return -1;
-
-	if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0)
-		return -1;
-
-	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
-	close(fd_context);
-	return fd_tmpfs;
-}
-
 FIXTURE_SETUP(coredump)
 {
 	FILE *file;
@@ -208,1620 +166,4 @@ TEST_F_TIMEOUT(coredump, stackdump, 120)
 	fclose(file);
 }
 
-static int create_and_listen_unix_socket(const char *path)
-{
-	struct sockaddr_un addr = {
-		.sun_family = AF_UNIX,
-	};
-	assert(strlen(path) < sizeof(addr.sun_path) - 1);
-	strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
-	size_t addr_len =
-		offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1;
-	int fd, ret;
-
-	fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
-	if (fd < 0)
-		goto out;
-
-	ret = bind(fd, (const struct sockaddr *)&addr, addr_len);
-	if (ret < 0)
-		goto out;
-
-	ret = listen(fd, 128);
-	if (ret < 0)
-		goto out;
-
-	return fd;
-
-out:
-	if (fd >= 0)
-		close(fd);
-	return -1;
-}
-
-static bool set_core_pattern(const char *pattern)
-{
-	int fd;
-	ssize_t ret;
-
-	fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC);
-	if (fd < 0)
-		return false;
-
-	ret = write(fd, pattern, strlen(pattern));
-	close(fd);
-	if (ret < 0)
-		return false;
-
-	fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern));
-	return ret == strlen(pattern);
-}
-
-static int get_peer_pidfd(int fd)
-{
-	int fd_peer_pidfd;
-	socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
-	int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd,
-			     &fd_peer_pidfd_len);
-	if (ret < 0) {
-		fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
-		return -1;
-	}
-	return fd_peer_pidfd;
-}
-
-static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info)
-{
-	memset(info, 0, sizeof(*info));
-	info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
-	return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0;
-}
-
-static void
-wait_and_check_coredump_server(pid_t pid_coredump_server,
-			       struct __test_metadata *const _metadata,
-			       FIXTURE_DATA(coredump)* self)
-{
-	int status;
-	waitpid(pid_coredump_server, &status, 0);
-	self->pid_coredump_server = -ESRCH;
-	ASSERT_TRUE(WIFEXITED(status));
-	ASSERT_EQ(WEXITSTATUS(status), 0);
-}
-
-TEST_F(coredump, socket)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct stat st;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		fd_core_file = creat("/tmp/coredump.file", 0644);
-		if (fd_core_file < 0)
-			goto out;
-
-		for (;;) {
-			char buffer[4096];
-			ssize_t bytes_read, bytes_write;
-
-			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
-			if (bytes_read < 0)
-				goto out;
-
-			if (bytes_read == 0)
-				break;
-
-			bytes_write = write(fd_core_file, buffer, bytes_read);
-			if (bytes_read != bytes_write)
-				goto out;
-		}
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_core_file >= 0)
-			close(fd_core_file);
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_TRUE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-
-	ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
-	ASSERT_GT(st.st_size, 0);
-	system("file /tmp/coredump.file");
-}
-
-TEST_F(coredump, socket_detect_userspace_client)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct stat st;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (info.coredump_mask & PIDFD_COREDUMPED)
-			goto out;
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0) {
-		int fd_socket;
-		ssize_t ret;
-		const struct sockaddr_un coredump_sk = {
-			.sun_family = AF_UNIX,
-			.sun_path = "/tmp/coredump.socket",
-		};
-		size_t coredump_sk_len =
-			offsetof(struct sockaddr_un, sun_path) +
-			sizeof("/tmp/coredump.socket");
-
-		fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
-		if (fd_socket < 0)
-			_exit(EXIT_FAILURE);
-
-		ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
-		if (ret < 0)
-			_exit(EXIT_FAILURE);
-
-		close(fd_socket);
-		_exit(EXIT_SUCCESS);
-	}
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFEXITED(status));
-	ASSERT_EQ(WEXITSTATUS(status), 0);
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-
-	ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
-	ASSERT_EQ(errno, ENOENT);
-}
-
-TEST_F(coredump, socket_enoent)
-{
-	int pidfd, status;
-	pid_t pid;
-
-	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_FALSE(WCOREDUMP(status));
-}
-
-TEST_F(coredump, socket_no_listener)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	int ipc_sockets[2];
-	char c;
-	const struct sockaddr_un coredump_sk = {
-		.sun_family = AF_UNIX,
-		.sun_path = "/tmp/coredump.socket",
-	};
-	size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
-				 sizeof("/tmp/coredump.socket");
-
-	ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		int fd_server = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
-		if (fd_server < 0)
-			goto out;
-
-		ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
-		if (ret < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_server >= 0)
-			close(fd_server);
-		close(ipc_sockets[1]);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_FALSE(WCOREDUMP(status));
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-static ssize_t recv_marker(int fd)
-{
-	enum coredump_mark mark = COREDUMP_MARK_REQACK;
-	ssize_t ret;
-
-	ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL);
-	if (ret != sizeof(mark))
-		return -1;
-
-	switch (mark) {
-	case COREDUMP_MARK_REQACK:
-		fprintf(stderr, "Received marker: ReqAck\n");
-		return COREDUMP_MARK_REQACK;
-	case COREDUMP_MARK_MINSIZE:
-		fprintf(stderr, "Received marker: MinSize\n");
-		return COREDUMP_MARK_MINSIZE;
-	case COREDUMP_MARK_MAXSIZE:
-		fprintf(stderr, "Received marker: MaxSize\n");
-		return COREDUMP_MARK_MAXSIZE;
-	case COREDUMP_MARK_UNSUPPORTED:
-		fprintf(stderr, "Received marker: Unsupported\n");
-		return COREDUMP_MARK_UNSUPPORTED;
-	case COREDUMP_MARK_CONFLICTING:
-		fprintf(stderr, "Received marker: Conflicting\n");
-		return COREDUMP_MARK_CONFLICTING;
-	default:
-		fprintf(stderr, "Received unknown marker: %u\n", mark);
-		break;
-	}
-	return -1;
-}
-
-static bool read_marker(int fd, enum coredump_mark mark)
-{
-	ssize_t ret;
-
-	ret = recv_marker(fd);
-	if (ret < 0)
-		return false;
-	return ret == mark;
-}
-
-static bool read_coredump_req(int fd, struct coredump_req *req)
-{
-	ssize_t ret;
-	size_t field_size, user_size, ack_size, kernel_size, remaining_size;
-
-	memset(req, 0, sizeof(*req));
-	field_size = sizeof(req->size);
-
-	/* Peek the size of the coredump request. */
-	ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL);
-	if (ret != field_size)
-		return false;
-	kernel_size = req->size;
-
-	if (kernel_size < COREDUMP_ACK_SIZE_VER0)
-		return false;
-	if (kernel_size >= PAGE_SIZE)
-		return false;
-
-	/* Use the minimum of user and kernel size to read the full request. */
-	user_size = sizeof(struct coredump_req);
-	ack_size = user_size < kernel_size ? user_size : kernel_size;
-	ret = recv(fd, req, ack_size, MSG_WAITALL);
-	if (ret != ack_size)
-		return false;
-
-	fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n",
-		req->size, (unsigned long long)req->mask);
-
-	if (user_size > kernel_size)
-		remaining_size = user_size - kernel_size;
-	else
-		remaining_size = kernel_size - user_size;
-
-	if (PAGE_SIZE <= remaining_size)
-		return false;
-
-	/*
-	 * Discard any additional data if the kernel's request was larger than
-	 * what we knew about or cared about.
-	 */
-	if (remaining_size) {
-		char buffer[PAGE_SIZE];
-
-		ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL);
-		if (ret != remaining_size)
-			return false;
-		fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size);
-	}
-
-	return true;
-}
-
-static bool send_coredump_ack(int fd, const struct coredump_req *req,
-			      __u64 mask, size_t size_ack)
-{
-	ssize_t ret;
-	/*
-	 * Wrap struct coredump_ack in a larger struct so we can
-	 * simulate sending to much data to the kernel.
-	 */
-	struct large_ack_for_size_testing {
-		struct coredump_ack ack;
-		char buffer[PAGE_SIZE];
-	} large_ack = {};
-
-	if (!size_ack)
-		size_ack = sizeof(struct coredump_ack) < req->size_ack ?
-				   sizeof(struct coredump_ack) :
-				   req->size_ack;
-	large_ack.ack.mask = mask;
-	large_ack.ack.size = size_ack;
-	ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL);
-	if (ret != size_ack)
-		return false;
-
-	fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n",
-		size_ack, (unsigned long long)mask);
-	return true;
-}
-
-static bool check_coredump_req(const struct coredump_req *req, size_t min_size,
-			       __u64 required_mask)
-{
-	if (req->size < min_size)
-		return false;
-	if ((req->mask & required_mask) != required_mask)
-		return false;
-	if (req->mask & ~required_mask)
-		return false;
-	return true;
-}
-
-TEST_F(coredump, socket_request_kernel)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct stat st;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		struct coredump_req req = {};
-		int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		fd_core_file = creat("/tmp/coredump.file", 0644);
-		if (fd_core_file < 0)
-			goto out;
-
-		if (!read_coredump_req(fd_coredump, &req))
-			goto out;
-
-		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-					COREDUMP_KERNEL | COREDUMP_USERSPACE |
-					COREDUMP_REJECT | COREDUMP_WAIT))
-			goto out;
-
-		if (!send_coredump_ack(fd_coredump, &req,
-				       COREDUMP_KERNEL | COREDUMP_WAIT, 0))
-			goto out;
-
-		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
-			goto out;
-
-		for (;;) {
-			char buffer[4096];
-			ssize_t bytes_read, bytes_write;
-
-			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
-			if (bytes_read < 0)
-				goto out;
-
-			if (bytes_read == 0)
-				break;
-
-			bytes_write = write(fd_core_file, buffer, bytes_read);
-			if (bytes_read != bytes_write)
-				goto out;
-		}
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_core_file >= 0)
-			close(fd_core_file);
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_TRUE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-
-	ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
-	ASSERT_GT(st.st_size, 0);
-	system("file /tmp/coredump.file");
-}
-
-TEST_F(coredump, socket_request_userspace)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		struct coredump_req req = {};
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		if (!read_coredump_req(fd_coredump, &req))
-			goto out;
-
-		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-					COREDUMP_KERNEL | COREDUMP_USERSPACE |
-					COREDUMP_REJECT | COREDUMP_WAIT))
-			goto out;
-
-		if (!send_coredump_ack(fd_coredump, &req,
-				       COREDUMP_USERSPACE | COREDUMP_WAIT, 0))
-			goto out;
-
-		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
-			goto out;
-
-		for (;;) {
-			char buffer[4096];
-			ssize_t bytes_read;
-
-			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
-			if (bytes_read > 0)
-				goto out;
-
-			if (bytes_read < 0)
-				goto out;
-
-			if (bytes_read == 0)
-				break;
-		}
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_TRUE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_reject)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		struct coredump_req req = {};
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		if (!read_coredump_req(fd_coredump, &req))
-			goto out;
-
-		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-					COREDUMP_KERNEL | COREDUMP_USERSPACE |
-					COREDUMP_REJECT | COREDUMP_WAIT))
-			goto out;
-
-		if (!send_coredump_ack(fd_coredump, &req,
-				       COREDUMP_REJECT | COREDUMP_WAIT, 0))
-			goto out;
-
-		if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
-			goto out;
-
-		for (;;) {
-			char buffer[4096];
-			ssize_t bytes_read;
-
-			bytes_read = read(fd_coredump, buffer, sizeof(buffer));
-			if (bytes_read > 0)
-				goto out;
-
-			if (bytes_read < 0)
-				goto out;
-
-			if (bytes_read == 0)
-				break;
-		}
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_FALSE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_invalid_flag_combination)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		struct coredump_req req = {};
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		if (!read_coredump_req(fd_coredump, &req))
-			goto out;
-
-		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-					COREDUMP_KERNEL | COREDUMP_USERSPACE |
-					COREDUMP_REJECT | COREDUMP_WAIT))
-			goto out;
-
-		if (!send_coredump_ack(fd_coredump, &req,
-				       COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0))
-			goto out;
-
-		if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING))
-			goto out;
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_FALSE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_unknown_flag)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		struct coredump_req req = {};
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		if (!read_coredump_req(fd_coredump, &req))
-			goto out;
-
-		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-					COREDUMP_KERNEL | COREDUMP_USERSPACE |
-					COREDUMP_REJECT | COREDUMP_WAIT))
-			goto out;
-
-		if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0))
-			goto out;
-
-		if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED))
-			goto out;
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_FALSE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_invalid_size_small)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		struct coredump_req req = {};
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		if (!read_coredump_req(fd_coredump, &req))
-			goto out;
-
-		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-					COREDUMP_KERNEL | COREDUMP_USERSPACE |
-					COREDUMP_REJECT | COREDUMP_WAIT))
-			goto out;
-
-		if (!send_coredump_ack(fd_coredump, &req,
-				       COREDUMP_REJECT | COREDUMP_WAIT,
-				       COREDUMP_ACK_SIZE_VER0 / 2))
-			goto out;
-
-		if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE))
-			goto out;
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_FALSE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_request_invalid_size_large)
-{
-	int pidfd, ret, status;
-	pid_t pid, pid_coredump_server;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
-	ASSERT_EQ(ret, 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		struct coredump_req req = {};
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1;
-		int exit_code = EXIT_FAILURE;
-
-		close(ipc_sockets[0]);
-
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-
-		close(ipc_sockets[1]);
-
-		fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-		if (fd_coredump < 0)
-			goto out;
-
-		fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-		if (fd_peer_pidfd < 0)
-			goto out;
-
-		if (!get_pidfd_info(fd_peer_pidfd, &info))
-			goto out;
-
-		if (!(info.mask & PIDFD_INFO_COREDUMP))
-			goto out;
-
-		if (!(info.coredump_mask & PIDFD_COREDUMPED))
-			goto out;
-
-		if (!read_coredump_req(fd_coredump, &req))
-			goto out;
-
-		if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-					COREDUMP_KERNEL | COREDUMP_USERSPACE |
-					COREDUMP_REJECT | COREDUMP_WAIT))
-			goto out;
-
-		if (!send_coredump_ack(fd_coredump, &req,
-				       COREDUMP_REJECT | COREDUMP_WAIT,
-				       COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE))
-			goto out;
-
-		if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE))
-			goto out;
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	pid = fork();
-	ASSERT_GE(pid, 0);
-	if (pid == 0)
-		crashing_child();
-
-	pidfd = sys_pidfd_open(pid, 0);
-	ASSERT_GE(pidfd, 0);
-
-	waitpid(pid, &status, 0);
-	ASSERT_TRUE(WIFSIGNALED(status));
-	ASSERT_FALSE(WCOREDUMP(status));
-
-	ASSERT_TRUE(get_pidfd_info(pidfd, &info));
-	ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-	ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-static int open_coredump_tmpfile(int fd_tmpfs_detached)
-{
-	return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600);
-}
-
-#define NUM_CRASHING_COREDUMPS 5
-
-TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500)
-{
-	int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
-	pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server;
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-
-	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
-		int exit_code = EXIT_FAILURE;
-		struct coredump_req req = {};
-
-		close(ipc_sockets[0]);
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0) {
-			fprintf(stderr, "Failed to create and listen on unix socket\n");
-			goto out;
-		}
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
-			fprintf(stderr, "Failed to notify parent via ipc socket\n");
-			goto out;
-		}
-		close(ipc_sockets[1]);
-
-		for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
-			fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-			if (fd_coredump < 0) {
-				fprintf(stderr, "accept4 failed: %m\n");
-				goto out;
-			}
-
-			fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-			if (fd_peer_pidfd < 0) {
-				fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump);
-				goto out;
-			}
-
-			if (!get_pidfd_info(fd_peer_pidfd, &info)) {
-				fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd);
-				goto out;
-			}
-
-			if (!(info.mask & PIDFD_INFO_COREDUMP)) {
-				fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd);
-				goto out;
-			}
-			if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
-				fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd);
-				goto out;
-			}
-
-			if (!read_coredump_req(fd_coredump, &req)) {
-				fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump);
-				goto out;
-			}
-
-			if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-						COREDUMP_KERNEL | COREDUMP_USERSPACE |
-						COREDUMP_REJECT | COREDUMP_WAIT)) {
-				fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump);
-				goto out;
-			}
-
-			if (!send_coredump_ack(fd_coredump, &req,
-					       COREDUMP_KERNEL | COREDUMP_WAIT, 0)) {
-				fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump);
-				goto out;
-			}
-
-			if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) {
-				fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump);
-				goto out;
-			}
-
-			fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
-			if (fd_core_file < 0) {
-				fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump);
-				goto out;
-			}
-
-			for (;;) {
-				char buffer[4096];
-				ssize_t bytes_read, bytes_write;
-
-				bytes_read = read(fd_coredump, buffer, sizeof(buffer));
-				if (bytes_read < 0) {
-					fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump);
-					goto out;
-				}
-
-				if (bytes_read == 0)
-					break;
-
-				bytes_write = write(fd_core_file, buffer, bytes_read);
-				if (bytes_read != bytes_write) {
-					fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file);
-					goto out;
-				}
-			}
-
-			close(fd_core_file);
-			close(fd_peer_pidfd);
-			close(fd_coredump);
-			fd_peer_pidfd = -1;
-			fd_coredump = -1;
-		}
-
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_core_file >= 0)
-			close(fd_core_file);
-		if (fd_peer_pidfd >= 0)
-			close(fd_peer_pidfd);
-		if (fd_coredump >= 0)
-			close(fd_coredump);
-		if (fd_server >= 0)
-			close(fd_server);
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
-		pid[i] = fork();
-		ASSERT_GE(pid[i], 0);
-		if (pid[i] == 0)
-			crashing_child();
-		pidfd[i] = sys_pidfd_open(pid[i], 0);
-		ASSERT_GE(pidfd[i], 0);
-	}
-
-	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
-		waitpid(pid[i], &status[i], 0);
-		ASSERT_TRUE(WIFSIGNALED(status[i]));
-		ASSERT_TRUE(WCOREDUMP(status[i]));
-	}
-
-	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
-		info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
-		ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
-		ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-		ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-	}
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-#define MAX_EVENTS 128
-
-static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file)
-{
-	int epfd = -1;
-	int exit_code = EXIT_FAILURE;
-
-	epfd = epoll_create1(0);
-	if (epfd < 0)
-		goto out;
-
-	struct epoll_event ev;
-	ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET;
-	ev.data.fd = fd_coredump;
-	if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0)
-		goto out;
-
-	for (;;) {
-		struct epoll_event events[1];
-		int n = epoll_wait(epfd, events, 1, -1);
-		if (n < 0)
-			break;
-
-		if (events[0].events & (EPOLLIN | EPOLLRDHUP)) {
-			for (;;) {
-				char buffer[4096];
-				ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer));
-				if (bytes_read < 0) {
-					if (errno == EAGAIN || errno == EWOULDBLOCK)
-						break;
-					goto out;
-				}
-				if (bytes_read == 0)
-					goto done;
-				ssize_t bytes_write = write(fd_core_file, buffer, bytes_read);
-				if (bytes_write != bytes_read)
-					goto out;
-			}
-		}
-	}
-
-done:
-	exit_code = EXIT_SUCCESS;
-out:
-	if (epfd >= 0)
-		close(epfd);
-	if (fd_core_file >= 0)
-		close(fd_core_file);
-	if (fd_peer_pidfd >= 0)
-		close(fd_peer_pidfd);
-	if (fd_coredump >= 0)
-		close(fd_coredump);
-	_exit(exit_code);
-}
-
-TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500)
-{
-	int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS];
-	pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS];
-	struct pidfd_info info = {};
-	int ipc_sockets[2];
-	char c;
-
-	ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket"));
-	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
-
-	pid_coredump_server = fork();
-	ASSERT_GE(pid_coredump_server, 0);
-	if (pid_coredump_server == 0) {
-		int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0;
-		fd_server = -1;
-		exit_code = EXIT_FAILURE;
-		n_conns = 0;
-		close(ipc_sockets[0]);
-		fd_server = create_and_listen_unix_socket("/tmp/coredump.socket");
-		if (fd_server < 0)
-			goto out;
-
-		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
-			goto out;
-		close(ipc_sockets[1]);
-
-		while (n_conns < NUM_CRASHING_COREDUMPS) {
-			int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1;
-			struct coredump_req req = {};
-			fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
-			if (fd_coredump < 0) {
-				if (errno == EAGAIN || errno == EWOULDBLOCK)
-					continue;
-				goto out;
-			}
-			fd_peer_pidfd = get_peer_pidfd(fd_coredump);
-			if (fd_peer_pidfd < 0)
-				goto out;
-			if (!get_pidfd_info(fd_peer_pidfd, &info))
-				goto out;
-			if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED))
-				goto out;
-			if (!read_coredump_req(fd_coredump, &req))
-				goto out;
-			if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0,
-						COREDUMP_KERNEL | COREDUMP_USERSPACE |
-						COREDUMP_REJECT | COREDUMP_WAIT))
-				goto out;
-			if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0))
-				goto out;
-			if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK))
-				goto out;
-			fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached);
-			if (fd_core_file < 0)
-				goto out;
-			pid_t worker = fork();
-			if (worker == 0) {
-				close(fd_server);
-				process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file);
-			}
-			worker_pids[n_conns] = worker;
-			if (fd_coredump >= 0)
-				close(fd_coredump);
-			if (fd_peer_pidfd >= 0)
-				close(fd_peer_pidfd);
-			if (fd_core_file >= 0)
-				close(fd_core_file);
-			n_conns++;
-		}
-		exit_code = EXIT_SUCCESS;
-out:
-		if (fd_server >= 0)
-			close(fd_server);
-
-		// Reap all worker processes
-		for (int i = 0; i < n_conns; i++) {
-			int wstatus;
-			if (waitpid(worker_pids[i], &wstatus, 0) < 0) {
-				fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]);
-			} else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) {
-				fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus));
-				exit_code = EXIT_FAILURE;
-			}
-		}
-
-		_exit(exit_code);
-	}
-	self->pid_coredump_server = pid_coredump_server;
-
-	EXPECT_EQ(close(ipc_sockets[1]), 0);
-	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
-	EXPECT_EQ(close(ipc_sockets[0]), 0);
-
-	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
-		pid[i] = fork();
-		ASSERT_GE(pid[i], 0);
-		if (pid[i] == 0)
-			crashing_child();
-		pidfd[i] = sys_pidfd_open(pid[i], 0);
-		ASSERT_GE(pidfd[i], 0);
-	}
-
-	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
-		ASSERT_GE(waitpid(pid[i], &status[i], 0), 0);
-		ASSERT_TRUE(WIFSIGNALED(status[i]));
-		ASSERT_TRUE(WCOREDUMP(status[i]));
-	}
-
-	for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) {
-		info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
-		ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0);
-		ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
-		ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
-	}
-
-	wait_and_check_coredump_server(pid_coredump_server, _metadata, self);
-}
-
-TEST_F(coredump, socket_invalid_paths)
-{
-	ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket"));
-	ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket"));
-	ASSERT_FALSE(set_core_pattern("@../coredump.socket"));
-	ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/.."));
-	ASSERT_FALSE(set_core_pattern("@.."));
-
-	ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket"));
-	ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket"));
-	ASSERT_FALSE(set_core_pattern("@@../coredump.socket"));
-	ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/.."));
-	ASSERT_FALSE(set_core_pattern("@@.."));
-
-	ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket"));
-}
-
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index b12f1f9babf8..b925756373ce 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -118,7 +118,7 @@ int main(int argc, char **argv)
 	}
 
 	printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
-			threads, seconds, node, dir[directions], granule);
+			threads, seconds, node, directions[dir], granule);
 	printf("average map latency(us):%.1f standard deviation:%.1f\n",
 			map.avg_map_100ns/10.0, map.map_stddev/10.0);
 	printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
index 585ecb4d5dc4..3633c7a3ed65 100644
--- a/tools/testing/selftests/drivers/net/.gitignore
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
+gro
 napi_id_helper
 psp_responder
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 6e41635bd55a..f5c71d993750 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -6,10 +6,12 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \
 		 ../../net/lib.sh \
 
 TEST_GEN_FILES := \
+	gro \
 	napi_id_helper \
 # end of TEST_GEN_FILES
 
 TEST_PROGS := \
+	gro.py \
 	hds.py \
 	napi_id.py \
 	napi_threaded.py \
@@ -18,10 +20,12 @@ TEST_PROGS := \
 	netcons_fragmented_msg.sh \
 	netcons_overflow.sh \
 	netcons_sysdata.sh \
+	netcons_torture.sh \
 	netpoll_basic.py \
 	ping.py \
 	psp.py \
 	queues.py \
+	ring_reconfig.py \
 	shaper.py \
 	stats.py \
 	xdp.py \
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 402d4ee84f2e..6c5c60adb5e8 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -14,6 +14,7 @@ TEST_PROGS := \
 	dev_addr_lists.sh \
 	mode-1-recovery-updelay.sh \
 	mode-2-recovery-updelay.sh \
+	netcons_over_bonding.sh \
 # end of TEST_PROGS
 
 TEST_FILES := \
@@ -24,6 +25,7 @@ TEST_FILES := \
 
 TEST_INCLUDES := \
 	../../../net/lib.sh \
+	../lib/sh/lib_netcons.sh \
 	../../../net/forwarding/lib.sh \
 # end of TEST_INCLUDES
 
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
index c4711272fe45..559f300f965a 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -30,6 +30,7 @@ check_connection()
 	local message=${3}
 	RET=0
 
+	sleep 0.25
 	ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
 	check_err $? "ping failed"
 	log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index 6bb290abd48b..991494376223 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -1,5 +1,6 @@
 CONFIG_BONDING=y
 CONFIG_BRIDGE=y
+CONFIG_CONFIGFS_FS=y
 CONFIG_DUMMY=y
 CONFIG_INET_ESP=y
 CONFIG_INET_ESP_OFFLOAD=y
@@ -9,6 +10,9 @@ CONFIG_MACVLAN=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NET_CLS_FLOWER=y
 CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
 CONFIG_NETDEVSIM=m
 CONFIG_NET_SCH_INGRESS=y
 CONFIG_NLMON=y
diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
new file mode 100755
index 000000000000..477cc9379500
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh
@@ -0,0 +1,361 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This selftest exercises trying to have multiple netpoll users at the same
+# time.
+#
+# This selftest has multiple smalls test inside, and the goal is to
+# get interfaces with bonding and netconsole in different orders in order
+# to catch any possible issue.
+#
+# The main test composes of four interfaces being created using netdevsim; two
+# of them are bonded to serve as the netconsole's transmit interface. The
+# remaining two interfaces are similarly bonded and assigned to a separate
+# network namespace, which acts as the receive interface, where socat monitors
+# for incoming messages.
+#
+# A netconsole message is then sent to ensure it is properly received across
+# this configuration.
+#
+# Later, run a few other tests, to make sure that bonding and netconsole
+# cannot coexist.
+#
+# The test's objective is to exercise netpoll usage when managed simultaneously
+# by multiple subsystems (netconsole and bonding).
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+modprobe bonding 2> /dev/null || true
+modprobe veth 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup_bond EXIT
+
+FORMAT="extended"
+IP_VERSION="ipv4"
+VETH0="veth"$(( RANDOM % 256))
+VETH1="veth"$((256 +  RANDOM % 256))
+TXNS=""
+RXNS=""
+
+# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with
+# the bonding interfaces
+function setup_bonding_ifaces() {
+	local RAND=$(( RANDOM % 100 ))
+	BOND_TX_MAIN_IF="bond_tx_$RAND"
+	BOND_RX_MAIN_IF="bond_rx_$RAND"
+
+	# Setup TX
+	if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+	then
+		echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2
+		# only clean nsim ifaces and namespace. Nothing else has been
+		# initialized
+		cleanup_bond_nsim
+		trap - EXIT
+		exit "${ksft_skip}"
+	fi
+
+	# create_netdevsim() got the interface up, but it needs to be down
+	# before being enslaved.
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" down
+	ip -n "${TXNS}" \
+		link set "${BOND_TX2_SLAVE_IF}" down
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	ip -n "${TXNS}" \
+		link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	ip -n "${TXNS}" \
+		link set "${BOND_TX_MAIN_IF}" up
+
+	# Setup RX
+	ip -n "${RXNS}" \
+		link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr
+	ip -n "${RXNS}" \
+		link set "${BOND_RX1_SLAVE_IF}" down
+	ip -n "${RXNS}" \
+		link set "${BOND_RX2_SLAVE_IF}" down
+	ip -n "${RXNS}" \
+		link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+	ip -n "${RXNS}" \
+		link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}"
+	ip -n "${RXNS}" \
+		link set "${BOND_RX_MAIN_IF}" up
+
+	export DSTIF="${BOND_RX_MAIN_IF}"
+	export SRCIF="${BOND_TX_MAIN_IF}"
+}
+
+# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface
+# and the other two will be bond to the RX interface (on the other namespace)
+function create_ifaces_bond() {
+	BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}")
+	BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}")
+	BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}")
+	BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}")
+}
+
+# netdevsim link BOND_TX to BOND_RX interfaces
+function link_ifaces_bond() {
+	local BOND_TX1_SLAVE_IFIDX
+	local BOND_TX2_SLAVE_IFIDX
+	local BOND_RX1_SLAVE_IFIDX
+	local BOND_RX2_SLAVE_IFIDX
+	local TXNS_FD
+	local RXNS_FD
+
+	BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+				cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex)
+	BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \
+				cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex)
+	BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+				cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex)
+	BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \
+				cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex)
+
+	exec {TXNS_FD}</var/run/netns/"${TXNS}"
+	exec {RXNS_FD}</var/run/netns/"${RXNS}"
+
+	# Linking TX ifaces to the RX ones (on the other namespace)
+	echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX"  \
+		> "$NSIM_DEV_SYS_LINK"
+	echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX"  \
+		> "$NSIM_DEV_SYS_LINK"
+
+	exec {TXNS_FD}<&-
+	exec {RXNS_FD}<&-
+}
+
+function create_all_ifaces() {
+	# setup_ns function is coming from lib.sh
+	setup_ns TXNS RXNS
+	export NAMESPACE="${RXNS}"
+
+	# Create two interfaces for RX and two for TX
+	create_ifaces_bond
+	# Link netlink ifaces
+	link_ifaces_bond
+}
+
+# configure DSTIF and SRCIF IPs
+function configure_ifaces_ips() {
+	local IP_VERSION=${1:-"ipv4"}
+	select_ipv4_or_ipv6 "${IP_VERSION}"
+
+	ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}"
+	ip -n "${RXNS}" link set "${DSTIF}" up
+
+	ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}"
+	ip -n "${TXNS}" link set "${SRCIF}" up
+}
+
+function test_enable_netpoll_on_enslaved_iface() {
+	echo 0 > "${NETCONS_PATH}"/enabled
+
+	# At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and
+	# linked to BOND_RX1_SLAVE_IF inside the namespace.
+	echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+	# This should fail with the following message in dmesg:
+	# netpoll: netconsole: ethX is a slave device, aborting
+	set +e
+	enable_netcons_ns 2> /dev/null
+	set -e
+
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+	then
+		echo "test failed: Bonding and netpoll cannot co-exists." >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+function test_delete_bond_and_reenable_target() {
+	ip -n "${TXNS}" \
+		link delete "${BOND_TX_MAIN_IF}" type bond
+
+	# BOND_TX1_SLAVE_IF is not attached to a bond interface anymore
+	# netpoll can be plugged in there
+	echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name
+
+	# this should work, since the interface is not enslaved
+	enable_netcons_ns
+
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+	then
+		echo "test failed: Unable to start netpoll on an unbond iface." >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# Send a netconsole message to the netconsole target
+function test_send_netcons_msg_through_bond_iface() {
+	# Listen for netconsole port inside the namespace and
+	# destination interface
+	listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" &
+	# Wait for socat to start and listen to the port.
+	wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}"
+	# Send the message
+	echo "${MSG}: ${TARGET}" > /dev/kmsg
+	# Wait until socat saves the file to disk
+	busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+	# Make sure the message was received in the dst part
+	# and exit
+	validate_result "${OUTPUT_FILE}" "${FORMAT}"
+	# kill socat in case it is still running
+	pkill_socat
+}
+
+# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF.
+# Given BOND_TX_MAIN_IF was deleted, recreate it first
+function test_enslave_netcons_enabled_iface {
+	# netconsole got disabled while the interface was down
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+	then
+		echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2
+		exit "${ksft_fail}"
+	fi
+
+	# recreate the bonding iface. it got deleted by previous
+	# test (test_delete_bond_and_reenable_target)
+	ip -n "${TXNS}" \
+		link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr
+
+	# sub-interface need to be down before attaching to bonding
+	# This will also disable netconsole.
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" down
+	ip -n "${TXNS}" \
+		link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	ip -n "${TXNS}" \
+		link set "${BOND_TX_MAIN_IF}" up
+
+	# netconsole got disabled while the interface was down
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]]
+	then
+		echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# Get netconsole enabled on a bonding interface and attach a second
+# sub-interface.
+function test_enslave_iface_to_bond {
+	# BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now
+	echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name
+	enable_netcons_ns
+
+	# netcons is attached to bond0 and BOND_TX1_SLAVE_IF is
+	# part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF.
+	ip -n "${TXNS}" \
+		link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}"
+	if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]]
+	then
+		echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+function test_enslave_iff_disabled_netpoll_iface {
+	local ret
+
+	# Create two interfaces. veth interfaces it known to have
+	# IFF_DISABLE_NETPOLL set
+	if ! ip link add "${VETH0}" type veth peer name "${VETH1}"
+	then
+		echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2
+		exit "${ksft_skip}"
+	fi
+	set +e
+	# This will print RTNETLINK answers: Device or resource busy
+	ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null
+	ret=$?
+	set -e
+	if [[ $ret -eq 0 ]]
+	then
+		echo "test failed: veth interface could not be enslaved"
+		exit "${ksft_fail}"
+	fi
+}
+
+# Given that netconsole picks the current net namespace, we need to enable it
+# from inside the TXNS namespace
+function enable_netcons_ns() {
+	ip netns exec "${TXNS}" sh -c \
+		"mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled"
+}
+
+####################
+# Tests start here #
+####################
+
+# Create regular interfaces using netdevsim and link them
+create_all_ifaces
+
+# Setup the bonding interfaces
+# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF
+# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF
+setup_bonding_ifaces
+
+# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF
+configure_ifaces_ips "${IP_VERSION}"
+
+_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}"
+enable_netcons_ns
+set_user_data
+
+# Test #1 : Create an bonding interface and attach netpoll into
+# the bonding interface. Netconsole/netpoll should work on
+# the bonding interface.
+test_send_netcons_msg_through_bond_iface
+echo "test #1: netpoll on bonding interface worked. Test passed" >&2
+
+# Test #2: Attach netpoll to an enslaved interface
+# Try to attach netpoll to an enslaved sub-interface (while still being part of
+# a bonding interface), which shouldn't be allowed
+test_enable_netpoll_on_enslaved_iface
+echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2
+
+# Test #3: Unplug the sub-interface from bond and enable netconsole
+# Detach the interface from a bonding interface and attach netpoll again
+test_delete_bond_and_reenable_target
+echo "test #3: Able to attach to an unbound interface. Test passed." >&2
+
+# Test #4: Enslave a sub-interface that had netconsole enabled
+# Try to enslave an interface that has netconsole/netpoll enabled.
+# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it
+test_enslave_netcons_enabled_iface
+echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2
+
+# Test #5: Enslave a sub-interface to a bonding interface
+# Enslave an interface to a bond interface that has netpoll attached
+# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of
+# it. Netconsole is currently disabled
+test_enslave_iface_to_bond
+echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2
+
+# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface
+# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is
+# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface
+# and it should fail, with netpoll: veth0 doesn't support polling
+test_enslave_iff_disabled_netpoll_iface
+echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2
+
+cleanup_bond
+trap - EXIT
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
index 2b1d9f2b3e9e..995b492f5bcb 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -57,7 +57,8 @@
 #include <string.h>
 #include <unistd.h>
 
-#include "../kselftest.h"
+#include "../../kselftest.h"
+#include "../../net/lib/ksft.h"
 
 #define DPORT 8000
 #define SPORT 1500
@@ -754,11 +755,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1,
 	static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
 
 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
-	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
 	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
 
 	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
-	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
 	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
 }
 
@@ -989,6 +990,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
 
 static void gro_sender(void)
 {
+	const int fin_delay_us = 100 * 1000;
 	static char fin_pkt[MAX_HDR_LEN];
 	struct sockaddr_ll daddr = {};
 	int txfd = -1;
@@ -1032,15 +1034,22 @@ static void gro_sender(void)
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 	} else if (strcmp(testname, "tcp") == 0) {
 		send_changed_checksum(txfd, &daddr);
+		/* Adding sleep before sending FIN so that it is not
+		 * received prior to other packets.
+		 */
+		usleep(fin_delay_us);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 
 		send_changed_seq(txfd, &daddr);
+		usleep(fin_delay_us);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 
 		send_changed_ts(txfd, &daddr);
+		usleep(fin_delay_us);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 
 		send_diff_opt(txfd, &daddr);
+		usleep(fin_delay_us);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 	} else if (strcmp(testname, "ip") == 0) {
 		send_changed_ECN(txfd, &daddr);
@@ -1119,6 +1128,8 @@ static void gro_receiver(void)
 	set_timeout(rxfd);
 	bind_packetsocket(rxfd);
 
+	ksft_ready();
+
 	memset(correct_payload, 0, sizeof(correct_payload));
 
 	if (strcmp(testname, "data") == 0) {
diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py
new file mode 100755
index 000000000000..ba83713bf7b5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/gro.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+GRO (Generic Receive Offload) conformance tests.
+
+Validates that GRO coalescing works correctly by running the gro
+binary in different configurations and checking for correct packet
+coalescing behavior.
+
+Test cases:
+  - data: Data packets with same size/headers and correct seq numbers coalesce
+  - ack: Pure ACK packets do not coalesce
+  - flags: Packets with PSH, SYN, URG, RST flags do not coalesce
+  - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce
+  - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce
+  - large: Packets larger than GRO_MAX_SIZE don't coalesce
+"""
+
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, KsftXfailEx
+from lib.py import cmd, defer, bkg, ip
+from lib.py import ksft_variants
+
+
+def _resolve_dmac(cfg, ipver):
+    """
+    Find the destination MAC address remote host should use to send packets
+    towards the local host. It may be a router / gateway address.
+    """
+
+    attr = "dmac" + ipver
+    # Cache the response across test cases
+    if hasattr(cfg, attr):
+        return getattr(cfg, attr)
+
+    route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}",
+               json=True, host=cfg.remote)[0]
+    gw = route.get("gateway")
+    # Local L2 segment, address directly
+    if not gw:
+        setattr(cfg, attr, cfg.dev['address'])
+        return getattr(cfg, attr)
+
+    # ping to make sure neighbor is resolved,
+    # bind to an interface, for v6 the GW is likely link local
+    cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote)
+
+    neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}",
+               json=True, host=cfg.remote)[0]
+    setattr(cfg, attr, neigh['lladdr'])
+    return getattr(cfg, attr)
+
+
+def _write_defer_restore(cfg, path, val, defer_undo=False):
+    with open(path, "r", encoding="utf-8") as fp:
+        orig_val = fp.read().strip()
+        if str(val) == orig_val:
+            return
+    with open(path, "w", encoding="utf-8") as fp:
+        fp.write(val)
+    if defer_undo:
+        defer(_write_defer_restore, cfg, path, orig_val)
+
+
+def _set_mtu_restore(dev, mtu, host):
+    if dev['mtu'] < mtu:
+        ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host)
+        defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
+
+
+def _setup(cfg, test_name):
+    """ Setup hardware loopback mode for GRO testing. """
+
+    if not hasattr(cfg, "bin_remote"):
+        cfg.bin_local = cfg.test_dir / "gro"
+        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+    # "large" test needs at least 4k MTU
+    if test_name == "large":
+        _set_mtu_restore(cfg.dev, 4096, None)
+        _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
+
+    flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
+    irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
+
+    _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
+    _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+
+    try:
+        # Disable TSO for local tests
+        cfg.require_nsim()  # will raise KsftXfailEx if not running on nsim
+
+        cmd(f"ethtool -K {cfg.ifname} gro on tso off")
+        cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote)
+    except KsftXfailEx:
+        pass
+
+def _gro_variants():
+    """Generator that yields all combinations of protocol and test types."""
+
+    for protocol in ["ipv4", "ipv6", "ipip"]:
+        for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]:
+            yield protocol, test_name
+
+
+@ksft_variants(_gro_variants())
+def test(cfg, protocol, test_name):
+    """Run a single GRO test with retries."""
+
+    ipver = "6" if protocol[-1] == "6" else "4"
+    cfg.require_ipver(ipver)
+
+    _setup(cfg, test_name)
+
+    base_cmd_args = [
+        f"--{protocol}",
+        f"--dmac {_resolve_dmac(cfg, ipver)}",
+        f"--smac {cfg.remote_dev['address']}",
+        f"--daddr {cfg.addr_v[ipver]}",
+        f"--saddr {cfg.remote_addr_v[ipver]}",
+        f"--test {test_name}",
+        "--verbose"
+    ]
+    base_args = " ".join(base_cmd_args)
+
+    # Each test is run 6 times to deflake, because given the receive timing,
+    # not all packets that should coalesce will be considered in the same flow
+    # on every try.
+    max_retries = 6
+    for attempt in range(max_retries):
+        rx_cmd = f"{cfg.bin_local} {base_args} --rx --iface {cfg.ifname}"
+        tx_cmd = f"{cfg.bin_remote} {base_args} --iface {cfg.remote_ifname}"
+
+        fail_now = attempt >= max_retries - 1
+
+        with bkg(rx_cmd, ksft_ready=True, exit_wait=True,
+                 fail=fail_now) as rx_proc:
+            cmd(tx_cmd, host=cfg.remote)
+
+        if rx_proc.ret == 0:
+            return
+
+        ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+        ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+        if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"):
+            ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
+            return
+
+        ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEpEnv(__file__) as cfg:
+        ksft_run(cases=[test], args=(cfg,))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore
index 6942bf575497..46540468a775 100644
--- a/tools/testing/selftests/drivers/net/hw/.gitignore
+++ b/tools/testing/selftests/drivers/net/hw/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 iou-zcrx
 ncdevmem
+toeplitz
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 8133d1a0051c..9c163ba6feee 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -1,10 +1,26 @@
 # SPDX-License-Identifier: GPL-2.0+ OR MIT
 
-TEST_GEN_FILES = iou-zcrx
+# Check if io_uring supports zero-copy receive
+HAS_IOURING_ZCRX := $(shell \
+	echo -e '#include <liburing.h>\n' \
+	     'void *func = (void *)io_uring_register_ifq;\n' \
+	     'int main() {return 0;}' | \
+	$(CC) -luring -x c - -o /dev/null 2>&1 && echo y)
+
+ifeq ($(HAS_IOURING_ZCRX),y)
+COND_GEN_FILES += iou-zcrx
+else
+$(warning excluding iouring tests, liburing not installed or too old)
+endif
+
+TEST_GEN_FILES := \
+	$(COND_GEN_FILES) \
+# end of TEST_GEN_FILES
 
 TEST_PROGS = \
 	csum.py \
 	devlink_port_split.py \
+	devlink_rate_tc_bw.py \
 	devmem.py \
 	ethtool.sh \
 	ethtool_extended_state.sh \
@@ -21,6 +37,7 @@ TEST_PROGS = \
 	rss_ctx.py \
 	rss_flow_label.py \
 	rss_input_xfrm.py \
+	toeplitz.py \
 	tso.py \
 	xsk_reconfig.py \
 	#
@@ -38,7 +55,10 @@ TEST_INCLUDES := \
 	#
 
 # YNL files, must be before "include ..lib.mk"
-YNL_GEN_FILES := ncdevmem
+YNL_GEN_FILES := \
+	ncdevmem \
+	toeplitz \
+# end of YNL_GEN_FILES
 TEST_GEN_FILES += $(YNL_GEN_FILES)
 TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
 
@@ -54,4 +74,6 @@ include ../../../net/ynl.mk
 
 include ../../../net/bpf.mk
 
+ifeq ($(HAS_IOURING_ZCRX),y)
 $(OUTPUT)/iou-zcrx: LDLIBS += -luring
+endif
diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
index ead6784d1910..4e4faa9275bb 100755
--- a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
+++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py
@@ -21,21 +21,21 @@ Test Cases:
 ----------
 1. test_no_tc_mapping_bandwidth:
    - Verifies that without TC mapping, bandwidth is NOT distributed according to
-     the configured 80/20 split between TC4 and TC3
-   - This test should fail if bandwidth matches the 80/20 split without TC
+     the configured 20/80 split between TC3 and TC4
+   - This test should fail if bandwidth matches the 20/80 split without TC
      mapping
-   - Expected: Bandwidth should NOT be distributed as 80/20
+   - Expected: Bandwidth should NOT be distributed as 20/80
 
 2. test_tc_mapping_bandwidth:
    - Configures TC mapping using mqprio qdisc
    - Verifies that with TC mapping, bandwidth IS distributed according to the
-     configured 80/20 split between TC3 and TC4
-   - Expected: Bandwidth should be distributed as 80/20
+     configured 20/80 split between TC3 and TC4
+   - Expected: Bandwidth should be distributed as 20/80
 
 Bandwidth Distribution:
 ----------------------
-- TC3 (VLAN 101): Configured for 80% of total bandwidth
-- TC4 (VLAN 102): Configured for 20% of total bandwidth
+- TC3 (VLAN 101): Configured for 20% of total bandwidth
+- TC4 (VLAN 102): Configured for 80% of total bandwidth
 - Total bandwidth: 1Gbps
 - Tolerance: +-12%
 
@@ -64,43 +64,40 @@ from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
 from lib.py import NetDrvEpEnv, DevlinkFamily
 from lib.py import NlError
 from lib.py import cmd, defer, ethtool, ip
+from lib.py import Iperf3Runner
 
 
 class BandwidthValidator:
     """
-    Validates bandwidth totals and per-TC shares against expected values
-    with a tolerance.
+    Validates total bandwidth and individual shares with tolerance
+    relative to the overall total.
     """
 
-    def __init__(self):
+    def __init__(self, shares):
         self.tolerance_percent = 12
-        self.expected_total_gbps = 1.0
-        self.total_min_expected = self.min_expected(self.expected_total_gbps)
-        self.total_max_expected = self.max_expected(self.expected_total_gbps)
-        self.tc_expected_percent = {
-            3: 20.0,
-            4: 80.0,
-        }
+        self.expected_total = sum(shares.values())
+        self.bounds = {}
+
+        for name, exp in shares.items():
+            self.bounds[name] = (self.min_expected(exp), self.max_expected(exp))
 
     def min_expected(self, value):
         """Calculates the minimum acceptable value based on tolerance."""
-        return value - (value * self.tolerance_percent / 100)
+        return value - (self.expected_total * self.tolerance_percent / 100)
 
     def max_expected(self, value):
         """Calculates the maximum acceptable value based on tolerance."""
-        return value + (value * self.tolerance_percent / 100)
-
-    def bound(self, expected, value):
-        """Returns True if value is within expected tolerance."""
-        return self.min_expected(expected) <= value <= self.max_expected(expected)
+        return value + (self.expected_total * self.tolerance_percent / 100)
 
-    def tc_bandwidth_bound(self, value, tc_ix):
+    def bound(self, values):
         """
-        Returns True if the given bandwidth value is within tolerance
-        for the TC's expected bandwidth.
+        Return True if all given values fall within tolerance.
         """
-        expected = self.tc_expected_percent[tc_ix]
-        return self.bound(expected, value)
+        for name, value in values.items():
+            low, high = self.bounds[name]
+            if not low <= value <= high:
+                return False
+        return True
 
 
 def setup_vf(cfg, set_tc_mapping=True):
@@ -116,8 +113,8 @@ def setup_vf(cfg, set_tc_mapping=True):
     except Exception as exc:
         raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc
     try:
-        cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
-        defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs")
+        cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
+        defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs", shell=True)
     except Exception as exc:
         raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc
 
@@ -139,8 +136,8 @@ def setup_vlans_on_vf(vf_ifc):
     Sets up two VLAN interfaces on the given VF, each mapped to a different TC.
     """
     vlan_configs = [
-        {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"},
-        {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"},
+        {"vlan_id": 101, "tc": 3, "ip": "198.51.100.1"},
+        {"vlan_id": 102, "tc": 4, "ip": "198.51.100.9"},
     ]
 
     for config in vlan_configs:
@@ -224,13 +221,13 @@ def setup_devlink_rate(cfg):
         raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc
 
 
-def setup_remote_server(cfg):
+def setup_remote_vlans(cfg):
     """
-    Sets up VLAN interfaces and starts iperf3 servers on the remote side.
+    Sets up VLAN interfaces on the remote side.
     """
     remote_dev = cfg.remote_ifname
     vlan_ids = [101, 102]
-    remote_ips = ["198.51.100.1", "198.51.100.9"]
+    remote_ips = ["198.51.100.2", "198.51.100.10"]
 
     for vlan_id, ip_addr in zip(vlan_ids, remote_ips):
         vlan_dev = f"{remote_dev}.{vlan_id}"
@@ -238,14 +235,13 @@ def setup_remote_server(cfg):
             f"type vlan id {vlan_id}", host=cfg.remote)
         cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote)
         cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote)
-        cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote)
         defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote)
 
 
 def setup_test_environment(cfg, set_tc_mapping=True):
     """
     Sets up the complete test environment including VF creation, VLANs,
-    bridge configuration, devlink rate setup, and the remote server.
+    bridge configuration and devlink rate setup.
     """
     vf_ifc = setup_vf(cfg, set_tc_mapping)
     ksft_pr(f"Created VF interface: {vf_ifc}")
@@ -256,51 +252,39 @@ def setup_test_environment(cfg, set_tc_mapping=True):
     setup_bridge(cfg)
 
     setup_devlink_rate(cfg)
-    setup_remote_server(cfg)
-    time.sleep(2)
+    setup_remote_vlans(cfg)
 
 
-def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1):
+def measure_bandwidth(cfg, server_ip, client_ip, barrier):
     """
-    Runs a single iperf3 client instance, binding to the given local IP.
-    Waits on a barrier to synchronize with other threads.
+    Synchronizes with peers and runs an iperf3-based bandwidth measurement
+    between the given endpoints. Returns average Gbps.
     """
+    runner = Iperf3Runner(cfg, server_ip=server_ip, client_ip=client_ip)
     try:
         barrier.wait(timeout=10)
     except Exception as exc:
         raise KsftFailEx("iperf3 barrier wait timed") from exc
 
-    iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"]
-    result = subprocess.run(iperf_cmd, capture_output=True, text=True,
-                            check=True)
-
     try:
-        output = json.loads(result.stdout)
-        bits_per_second = output["end"]["sum_received"]["bits_per_second"]
-        gbps = bits_per_second / 1e9
-        if gbps < min_expected_gbps:
-            ksft_pr(
-                f"iperf3 bandwidth too low: {gbps:.2f} Gbps "
-                f"(expected ≥ {min_expected_gbps} Gbps)"
-            )
-            return None
-        return gbps
-    except json.JSONDecodeError as exc:
-        ksft_pr(f"Failed to parse iperf3 JSON output: {exc}")
-        return None
+        bw_gbps = runner.measure_bandwidth(reverse=True)
+    except Exception as exc:
+        raise KsftFailEx("iperf3 bandwidth measurement failed") from exc
+
+    return bw_gbps
 
 
-def run_bandwidth_test():
+def run_bandwidth_test(cfg):
     """
-    Launches iperf3 client threads for each VLAN/TC pair and collects results.
+    Runs parallel bandwidth measurements for each VLAN/TC pair and collects results.
     """
-    def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix):
-        results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier)
+    def _run_measure_bandwidth_thread(local_ip, remote_ip, results, barrier, tc_ix):
+        results[tc_ix] = measure_bandwidth(cfg, local_ip, remote_ip, barrier)
 
     vf_vlan_data = [
         # (local_ip, remote_ip, TC)
-        ("198.51.100.2",  "198.51.100.1", 3),
-        ("198.51.100.10", "198.51.100.9", 4),
+        ("198.51.100.1",  "198.51.100.2", 3),
+        ("198.51.100.9", "198.51.100.10", 4),
     ]
 
     results = {}
@@ -309,8 +293,8 @@ def run_bandwidth_test():
 
     for local_ip, remote_ip, tc_ix in vf_vlan_data:
         thread = threading.Thread(
-            target=_run_iperf_client_thread,
-            args=(remote_ip, local_ip, results, start_barrier, tc_ix)
+            target=_run_measure_bandwidth_thread,
+            args=(local_ip, remote_ip, results, start_barrier, tc_ix)
         )
         thread.start()
         threads.append(thread)
@@ -320,10 +304,11 @@ def run_bandwidth_test():
 
     for tc_ix, tc_bw in results.items():
         if tc_bw is None:
-            raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth")
+            raise KsftFailEx("iperf3 failed; cannot evaluate bandwidth")
 
     return results
 
+
 def calculate_bandwidth_percentages(results):
     """
     Calculates the percentage of total bandwidth received by TC3 and TC4.
@@ -364,59 +349,48 @@ def verify_total_bandwidth(bw_data, validator):
     """
     total = bw_data['total_bw']
 
-    if validator.bound(validator.expected_total_gbps, total):
+    if validator.bound({"total": total}):
         return
 
-    if total < validator.total_min_expected:
+    low, high = validator.bounds["total"]
+
+    if total < low:
         raise KsftSkipEx(
             f"Total bandwidth {total:.2f} Gbps < minimum "
-            f"{validator.total_min_expected:.2f} Gbps; "
-            f"parent tx_max ({validator.expected_total_gbps:.1f} G) "
+            f"{low:.2f} Gbps; "
+            f"parent tx_max ({validator.expected_total:.1f} G) "
             f"not reached, cannot validate share"
         )
 
     raise KsftFailEx(
         f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling "
-        f"{validator.total_max_expected:.2f} Gbps "
-        f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)"
+        f"{high:.2f} Gbps "
+        f"(VF tx_max set to {validator.expected_total:.1f} G)"
     )
 
 
-def check_bandwidth_distribution(bw_data, validator):
-    """
-    Checks whether the measured TC3 and TC4 bandwidth percentages
-    fall within their expected tolerance ranges.
-
-    Returns:
-        bool: True if both TC3 and TC4 percentages are within bounds.
-    """
-    tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3)
-    tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4)
-
-    return tc3_valid and tc4_valid
-
-
 def run_bandwidth_distribution_test(cfg, set_tc_mapping):
     """
-    Runs parallel iperf3 tests for both TCs and collects results.
+    Runs parallel bandwidth measurements for both TCs and collects results.
     """
     setup_test_environment(cfg, set_tc_mapping)
-    bandwidths = run_bandwidth_test()
+    bandwidths = run_bandwidth_test(cfg)
     bw_data = calculate_bandwidth_percentages(bandwidths)
     test_name = "with TC mapping" if set_tc_mapping else "without TC mapping"
     print_bandwidth_results(bw_data, test_name)
 
-    verify_total_bandwidth(bw_data, cfg.bw_validator)
+    verify_total_bandwidth(bw_data, cfg.traffic_bw_validator)
 
-    return check_bandwidth_distribution(bw_data, cfg.bw_validator)
+    return cfg.tc_bw_validator.bound({"tc3": bw_data['tc3_percentage'],
+                                     "tc4": bw_data['tc4_percentage']})
 
 
 def test_no_tc_mapping_bandwidth(cfg):
     """
-    Verifies that bandwidth is not split 80/20 without traffic class mapping.
+    Verifies that bandwidth is not split 20/80 without traffic class mapping.
     """
-    pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping"
-    fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping"
+    pass_bw_msg = "Bandwidth is NOT distributed as 20/80 without TC mapping"
+    fail_bw_msg = "Bandwidth matched 20/80 split without TC mapping"
     is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout
 
     if run_bandwidth_distribution_test(cfg, set_tc_mapping=False):
@@ -430,13 +404,13 @@ def test_no_tc_mapping_bandwidth(cfg):
 
 def test_tc_mapping_bandwidth(cfg):
     """
-    Verifies that bandwidth is correctly split 80/20 between TC3 and TC4
+    Verifies that bandwidth is correctly split 20/80 between TC3 and TC4
     when traffic class mapping is set.
     """
     if run_bandwidth_distribution_test(cfg, set_tc_mapping=True):
-        ksft_pr("Bandwidth is distributed as 80/20 with TC mapping")
+        ksft_pr("Bandwidth is distributed as 20/80 with TC mapping")
     else:
-        raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping")
+        raise KsftFailEx("Bandwidth did not match 20/80 split with TC mapping")
 
 
 def main() -> None:
@@ -451,9 +425,9 @@ def main() -> None:
         )
         if not cfg.pci:
             raise KsftSkipEx("Could not get PCI address of the interface")
-        cfg.require_cmd("iperf3", local=True, remote=True)
 
-        cfg.bw_validator = BandwidthValidator()
+        cfg.traffic_bw_validator = BandwidthValidator({"total": 1})
+        cfg.tc_bw_validator = BandwidthValidator({"tc3": 20, "tc4": 80})
 
         cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth]
 
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 0ceb297e7757..766bfc4ad842 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -1,5 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
 
+"""
+Driver test environment (hardware-only tests).
+NetDrvEnv and NetDrvEpEnv are the main environment classes.
+Former is for local host only tests, latter creates / connects
+to a remote endpoint. See NIPA wiki for more information about
+running and writing driver tests.
+"""
+
 import sys
 from pathlib import Path
 
@@ -8,26 +16,37 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
 try:
     sys.path.append(KSFT_DIR.as_posix())
 
-    from net.lib.py import *
-    from drivers.net.lib.py import *
-
     # Import one by one to avoid pylint false positives
+    from net.lib.py import NetNS, NetNSEnter, NetdevSimDev
     from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \
         NlError, RtnlFamily, DevlinkFamily, PSPFamily
     from net.lib.py import CmdExitFailure
-    from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \
-        rand_port, tool, wait_port_listen
-    from net.lib.py import fd_read_timeout
+    from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
+        fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
     from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
     from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
-        ksft_setup
+        ksft_setup, ksft_variants, KsftNamedVariant
     from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
         ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
-    from net.lib.py import NetNSEnter
-    from drivers.net.lib.py import GenerateTraffic
+    from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner
     from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv
+
+    __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev",
+               "EthtoolFamily", "NetdevFamily", "NetshaperFamily",
+               "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
+               "CmdExitFailure",
+               "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
+               "fd_read_timeout", "ip", "rand_port",
+               "wait_port_listen", "wait_file",
+               "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
+               "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
+               "ksft_setup", "ksft_variants", "KsftNamedVariant",
+               "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
+               "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
+               "ksft_not_none", "ksft_not_none",
+               "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+               "Iperf3Runner"]
 except ModuleNotFoundError as e:
-    ksft_pr("Failed importing `net` library from kernel sources")
-    ksft_pr(str(e))
-    ktap_result(True, comment="SKIP")
+    print("Failed importing `net` library from kernel sources")
+    print(str(e))
     sys.exit(4)
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
index 9ba03164d73a..a4d04438c313 100644
--- a/tools/testing/selftests/net/toeplitz.c
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -52,7 +52,11 @@
 #include <sys/types.h>
 #include <unistd.h>
 
-#include "../kselftest.h"
+#include <ynl.h>
+#include "ethtool-user.h"
+
+#include "../../../kselftest.h"
+#include "../../../net/lib/ksft.h"
 
 #define TOEPLITZ_KEY_MIN_LEN	40
 #define TOEPLITZ_KEY_MAX_LEN	60
@@ -64,6 +68,7 @@
 #define FOUR_TUPLE_MAX_LEN	((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
 
 #define RSS_MAX_CPUS (1 << 16)	/* real constraint is PACKET_FANOUT_MAX */
+#define RSS_MAX_INDIR	(1 << 16)
 
 #define RPS_MAX_CPUS 16UL	/* must be a power of 2 */
 
@@ -101,6 +106,8 @@ struct ring_state {
 static unsigned int rx_irq_cpus[RSS_MAX_CPUS];	/* map from rxq to cpu */
 static int rps_silo_to_cpu[RPS_MAX_CPUS];
 static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static unsigned int rss_indir_tbl[RSS_MAX_INDIR];
+static unsigned int rss_indir_tbl_size;
 static struct ring_state rings[RSS_MAX_CPUS];
 
 static inline uint32_t toeplitz(const unsigned char *four_tuple,
@@ -129,7 +136,12 @@ static inline uint32_t toeplitz(const unsigned char *four_tuple,
 /* Compare computed cpu with arrival cpu from packet_fanout_cpu */
 static void verify_rss(uint32_t rx_hash, int cpu)
 {
-	int queue = rx_hash % cfg_num_queues;
+	int queue;
+
+	if (rss_indir_tbl_size)
+		queue = rss_indir_tbl[rx_hash % rss_indir_tbl_size];
+	else
+		queue = rx_hash % cfg_num_queues;
 
 	log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
 	if (rx_irq_cpus[queue] != cpu) {
@@ -482,6 +494,56 @@ static void parse_rps_bitmap(const char *arg)
 			rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
 }
 
+static void read_rss_dev_info_ynl(void)
+{
+	struct ethtool_rss_get_req *req;
+	struct ethtool_rss_get_rsp *rsp;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_ethtool_family, NULL);
+	if (!ys)
+		error(1, errno, "ynl_sock_create failed");
+
+	req = ethtool_rss_get_req_alloc();
+	if (!req)
+		error(1, errno, "ethtool_rss_get_req_alloc failed");
+
+	ethtool_rss_get_req_set_header_dev_name(req, cfg_ifname);
+
+	rsp = ethtool_rss_get(ys, req);
+	if (!rsp)
+		error(1, ys->err.code, "YNL: %s", ys->err.msg);
+
+	if (!rsp->_len.hkey)
+		error(1, 0, "RSS key not available for %s", cfg_ifname);
+
+	if (rsp->_len.hkey < TOEPLITZ_KEY_MIN_LEN ||
+	    rsp->_len.hkey > TOEPLITZ_KEY_MAX_LEN)
+		error(1, 0, "RSS key length %u out of bounds [%u, %u]",
+		      rsp->_len.hkey, TOEPLITZ_KEY_MIN_LEN,
+		      TOEPLITZ_KEY_MAX_LEN);
+
+	memcpy(toeplitz_key, rsp->hkey, rsp->_len.hkey);
+
+	if (rsp->_count.indir > RSS_MAX_INDIR)
+		error(1, 0, "RSS indirection table too large (%u > %u)",
+		      rsp->_count.indir, RSS_MAX_INDIR);
+
+	/* If indir table not available we'll fallback to simple modulo math */
+	if (rsp->_count.indir) {
+		memcpy(rss_indir_tbl, rsp->indir,
+		       rsp->_count.indir * sizeof(rss_indir_tbl[0]));
+		rss_indir_tbl_size = rsp->_count.indir;
+
+		log_verbose("RSS indirection table size: %u\n",
+			    rss_indir_tbl_size);
+	}
+
+	ethtool_rss_get_rsp_free(rsp);
+	ethtool_rss_get_req_free(req);
+	ynl_sock_destroy(ys);
+}
+
 static void parse_opts(int argc, char **argv)
 {
 	static struct option long_options[] = {
@@ -550,7 +612,7 @@ static void parse_opts(int argc, char **argv)
 	}
 
 	if (!have_toeplitz)
-		error(1, 0, "Must supply rss key ('-k')");
+		read_rss_dev_info_ynl();
 
 	num_cpus = get_nprocs();
 	if (num_cpus > RSS_MAX_CPUS)
@@ -576,6 +638,10 @@ int main(int argc, char **argv)
 		fd_sink = setup_sink();
 
 	setup_rings();
+
+	/* Signal to test framework that we're ready to receive */
+	ksft_ready();
+
 	process_rings();
 	cleanup_rings();
 
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.py b/tools/testing/selftests/drivers/net/hw/toeplitz.py
new file mode 100755
index 000000000000..d2db5ee9e358
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Toeplitz Rx hashing test:
+ - rxhash (the hash value calculation itself);
+ - RSS mapping from rxhash to rx queue;
+ - RPS mapping from rxhash to cpu.
+"""
+
+import glob
+import os
+import socket
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import NetDrvEpEnv, EthtoolFamily, NetdevFamily
+from lib.py import cmd, bkg, rand_port, defer
+from lib.py import ksft_in
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx, KsftFailEx
+
+# "define" for the ID of the Toeplitz hash function
+ETH_RSS_HASH_TOP = 1
+
+
+def _check_rps_and_rfs_not_configured(cfg):
+    """Verify that RPS is not already configured."""
+
+    for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+        with open(rps_file, "r", encoding="utf-8") as fp:
+            val = fp.read().strip()
+            if set(val) - {"0", ","}:
+                raise KsftSkipEx(f"RPS already configured on {rps_file}: {val}")
+
+    rfs_file = "/proc/sys/net/core/rps_sock_flow_entries"
+    with open(rfs_file, "r", encoding="utf-8") as fp:
+        val = fp.read().strip()
+        if val != "0":
+            raise KsftSkipEx(f"RFS already configured {rfs_file}: {val}")
+
+
+def _get_cpu_for_irq(irq):
+    with open(f"/proc/irq/{irq}/smp_affinity_list", "r",
+              encoding="utf-8") as fp:
+        data = fp.read().strip()
+        if "," in data or "-" in data:
+            raise KsftFailEx(f"IRQ{irq} is not mapped to a single core: {data}")
+        return int(data)
+
+
+def _get_irq_cpus(cfg):
+    """
+    Read the list of IRQs for the device Rx queues.
+    """
+    queues = cfg.netnl.queue_get({"ifindex": cfg.ifindex}, dump=True)
+    napis = cfg.netnl.napi_get({"ifindex": cfg.ifindex}, dump=True)
+
+    # Remap into ID-based dicts
+    napis = {n["id"]: n for n in napis}
+    queues = {f"{q['type']}{q['id']}": q for q in queues}
+
+    cpus = []
+    for rx in range(9999):
+        name = f"rx{rx}"
+        if name not in queues:
+            break
+        cpus.append(_get_cpu_for_irq(napis[queues[name]["napi-id"]]["irq"]))
+
+    return cpus
+
+
+def _get_unused_cpus(cfg, count=2):
+    """
+    Get CPUs that are not used by Rx queues.
+    Returns a list of at least 'count' CPU numbers.
+    """
+
+    # Get CPUs used by Rx queues
+    rx_cpus = set(_get_irq_cpus(cfg))
+
+    # Get total number of CPUs
+    num_cpus = os.cpu_count()
+
+    # Find unused CPUs
+    unused_cpus = [cpu for cpu in range(num_cpus) if cpu not in rx_cpus]
+
+    if len(unused_cpus) < count:
+        raise KsftSkipEx(f"Need at {count} CPUs not used by Rx queues, found {len(unused_cpus)}")
+
+    return unused_cpus[:count]
+
+
+def _configure_rps(cfg, rps_cpus):
+    """Configure RPS for all Rx queues."""
+
+    mask = 0
+    for cpu in rps_cpus:
+        mask |= (1 << cpu)
+    mask = hex(mask)[2:]
+
+    # Set RPS bitmap for all rx queues
+    for rps_file in glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*/rps_cpus"):
+        with open(rps_file, "w", encoding="utf-8") as fp:
+            fp.write(mask)
+
+    return mask
+
+
+def _send_traffic(cfg, proto_flag, ipver, port):
+    """Send 20 packets of requested type."""
+
+    # Determine protocol and IP version for socat
+    if proto_flag == "-u":
+        proto = "UDP"
+    else:
+        proto = "TCP"
+
+    baddr = f"[{cfg.addr_v['6']}]" if ipver == "6" else cfg.addr_v["4"]
+
+    # Run socat in a loop to send traffic periodically
+    # Use sh -c with a loop similar to toeplitz_client.sh
+    socat_cmd = f"""
+    for i in `seq 20`; do
+        echo "msg $i" | socat -{ipver} -t 0.1 - {proto}:{baddr}:{port};
+        sleep 0.001;
+    done
+    """
+
+    cmd(socat_cmd, shell=True, host=cfg.remote)
+
+
+def _test_variants():
+    for grp in ["", "rss", "rps"]:
+        for l4 in ["tcp", "udp"]:
+            for l3 in ["4", "6"]:
+                name = f"{l4}_ipv{l3}"
+                if grp:
+                    name = f"{grp}_{name}"
+                yield KsftNamedVariant(name, "-" + l4[0], l3, grp)
+
+
+@ksft_variants(_test_variants())
+def test(cfg, proto_flag, ipver, grp):
+    """Run a single toeplitz test."""
+
+    cfg.require_ipver(ipver)
+
+    # Check that rxhash is enabled
+    ksft_in("receive-hashing: on", cmd(f"ethtool -k {cfg.ifname}").stdout)
+
+    rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}})
+    # Make sure NIC is configured to use Toeplitz hash, and no key xfrm.
+    if rss.get('hfunc') != ETH_RSS_HASH_TOP or rss.get('input-xfrm'):
+        cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+                           "hfunc": ETH_RSS_HASH_TOP,
+                           "input-xfrm": {}})
+        defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},
+                                  "hfunc": rss.get('hfunc'),
+                                  "input-xfrm": rss.get('input-xfrm', {})
+                                  })
+
+    port = rand_port(socket.SOCK_DGRAM)
+
+    toeplitz_path = cfg.test_dir / "toeplitz"
+    rx_cmd = [
+        str(toeplitz_path),
+        "-" + ipver,
+        proto_flag,
+        "-d", str(port),
+        "-i", cfg.ifname,
+        "-T", "4000",
+        "-s",
+        "-v"
+    ]
+
+    if grp:
+        _check_rps_and_rfs_not_configured(cfg)
+    if grp == "rss":
+        irq_cpus = ",".join([str(x) for x in _get_irq_cpus(cfg)])
+        rx_cmd += ["-C", irq_cpus]
+        ksft_pr(f"RSS using CPUs: {irq_cpus}")
+    elif grp == "rps":
+        # Get CPUs not used by Rx queues and configure them for RPS
+        rps_cpus = _get_unused_cpus(cfg, count=2)
+        rps_mask = _configure_rps(cfg, rps_cpus)
+        defer(_configure_rps, cfg, [])
+        rx_cmd += ["-r", rps_mask]
+        ksft_pr(f"RPS using CPUs: {rps_cpus}, mask: {rps_mask}")
+
+    # Run rx in background, it will exit once it has seen enough packets
+    with bkg(" ".join(rx_cmd), ksft_ready=True, exit_wait=True) as rx_proc:
+        while rx_proc.proc.poll() is None:
+            _send_traffic(cfg, proto_flag, ipver, port)
+
+    # Check rx result
+    ksft_pr("Receiver output:")
+    ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
+    if rx_proc.stderr:
+        ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+
+
+def main() -> None:
+    """Ksft boilerplate main."""
+
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.ethnl = EthtoolFamily()
+        cfg.netnl = NetdevFamily()
+        ksft_run(cases=[test], args=(cfg,))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
index e6c070f32f51..8b75faa9af6d 100644
--- a/tools/testing/selftests/drivers/net/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -22,10 +22,10 @@ try:
         NlError, RtnlFamily, DevlinkFamily, PSPFamily
     from net.lib.py import CmdExitFailure
     from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \
-        fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file
+        fd_read_timeout, ip, rand_port, wait_port_listen, wait_file
     from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx
     from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \
-        ksft_setup
+        ksft_setup, ksft_variants, KsftNamedVariant
     from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \
         ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none
 
@@ -34,20 +34,21 @@ try:
                "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily",
                "CmdExitFailure",
                "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool",
-               "fd_read_timeout", "ip", "rand_port", "tool",
+               "fd_read_timeout", "ip", "rand_port",
                "wait_port_listen", "wait_file",
                "KsftSkipEx", "KsftFailEx", "KsftXfailEx",
                "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run",
-               "ksft_setup",
+               "ksft_setup", "ksft_variants", "KsftNamedVariant",
                "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt",
                "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt",
                "ksft_not_none", "ksft_not_none"]
 
     from .env import NetDrvEnv, NetDrvEpEnv
-    from .load import GenerateTraffic
+    from .load import GenerateTraffic, Iperf3Runner
     from .remote import Remote
 
-    __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"]
+    __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote",
+                "Iperf3Runner"]
 except ModuleNotFoundError as e:
     print("Failed importing `net` library from kernel sources")
     print(str(e))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 01be3d9b9720..8b644fd84ff2 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -168,6 +168,8 @@ class NetDrvEpEnv(NetDrvEnvBase):
 
         # resolve remote interface name
         self.remote_ifname = self.resolve_remote_ifc()
+        self.remote_dev = ip("-d link show dev " + self.remote_ifname,
+                             host=self.remote, json=True)[0]
 
         self._required_cmd = {}
 
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index c4e808407cc4..f181fa2d38fc 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -2,21 +2,89 @@
 
 import re
 import time
+import json
 
 from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
 
-class GenerateTraffic:
-    def __init__(self, env, port=None):
-        env.require_cmd("iperf3", local=True, remote=True)
 
+class Iperf3Runner:
+    """
+    Sets up and runs iperf3 traffic.
+    """
+    def __init__(self, env, port=None, server_ip=None, client_ip=None):
+        env.require_cmd("iperf3", local=True, remote=True)
         self.env = env
-
         self.port = rand_port() if port is None else port
-        self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True)
+        self.server_ip = server_ip
+        self.client_ip = client_ip
+
+    def _build_server(self):
+        cmdline = f"iperf3 -s -1 -p {self.port}"
+        if self.server_ip:
+            cmdline += f" -B {self.server_ip}"
+        return cmdline
+
+    def _build_client(self, streams, duration, reverse):
+        host = self.env.addr if self.server_ip is None else self.server_ip
+        cmdline = f"iperf3 -c {host} -p {self.port} -P {streams} -t {duration} -J"
+        if self.client_ip:
+            cmdline += f" -B {self.client_ip}"
+        if reverse:
+            cmdline += " --reverse"
+        return cmdline
+
+    def start_server(self):
+        """
+        Starts an iperf3 server with optional bind IP.
+        """
+        cmdline = self._build_server()
+        proc = cmd(cmdline, background=True)
         wait_port_listen(self.port)
         time.sleep(0.1)
-        self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400",
-                                 background=True, host=env.remote)
+        return proc
+
+    def start_client(self, background=False, streams=1, duration=10, reverse=False):
+        """
+        Starts the iperf3 client with the configured options.
+        """
+        cmdline = self._build_client(streams, duration, reverse)
+        return cmd(cmdline, background=background, host=self.env.remote)
+
+    def measure_bandwidth(self, reverse=False):
+        """
+        Runs an iperf3 measurement and returns the average bandwidth (Gbps).
+        Discards the first and last few reporting intervals and uses only the
+        middle part of the run where throughput is typically stable.
+        """
+        self.start_server()
+        result = self.start_client(duration=10, reverse=reverse)
+
+        if result.ret != 0:
+            raise RuntimeError("iperf3 failed to run successfully")
+        try:
+            out = json.loads(result.stdout)
+        except json.JSONDecodeError as exc:
+            raise ValueError("Failed to parse iperf3 JSON output") from exc
+
+        intervals = out.get("intervals", [])
+        samples = [i["sum"]["bits_per_second"] / 1e9 for i in intervals]
+        if len(samples) < 10:
+            raise ValueError(f"iperf3 returned too few intervals: {len(samples)}")
+        # Discard potentially unstable first and last 3 seconds.
+        stable = samples[3:-3]
+
+        avg = sum(stable) / len(stable)
+
+        return avg
+
+
+class GenerateTraffic:
+    def __init__(self, env, port=None):
+        self.env = env
+        self.runner = Iperf3Runner(env, port)
+
+        self._iperf_server = self.runner.start_server()
+        self._iperf_client = self.runner.start_client(background=True, streams=16, duration=86400)
 
         # Wait for traffic to ramp up
         if not self._wait_pkts(pps=1000):
@@ -61,7 +129,7 @@ class GenerateTraffic:
     def _wait_client_stopped(self, sleep=0.005, timeout=5):
         end = time.monotonic() + timeout
 
-        live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ")
+        live_port_pattern = re.compile(fr":{self.runner.port:04X} 0[^6] ")
 
         while time.monotonic() < end:
             data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 8e1085e89647..ae8abff4be40 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -11,9 +11,11 @@ set -euo pipefail
 LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
 
 SRCIF="" # to be populated later
+SRCIP="" # to be populated later
 SRCIP4="192.0.2.1"
 SRCIP6="fc00::1"
 DSTIF="" # to be populated later
+DSTIP="" # to be populated later
 DSTIP4="192.0.2.2"
 DSTIP6="fc00::2"
 
@@ -28,17 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
 # NAMESPACE will be populated by setup_ns with a random value
 NAMESPACE=""
 
-# IDs for netdevsim
+# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test
+# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the
+# same time.
 NSIM_DEV_1_ID=$((256 + RANDOM % 256))
 NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_BOND_TX_1=$((768 + RANDOM % 256))
+NSIM_BOND_TX_2=$((1024 + RANDOM % 256))
+NSIM_BOND_RX_1=$((1280 + RANDOM % 256))
+NSIM_BOND_RX_2=$((1536 + RANDOM % 256))
 NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
 
 # Used to create and delete namespaces
 source "${LIBDIR}"/../../../../net/lib.sh
 
 # Create netdevsim interfaces
 create_ifaces() {
-
 	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
 	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
 	udevadm settle 2> /dev/null || true
@@ -113,31 +121,38 @@ function set_network() {
 	configure_ip
 }
 
-function create_dynamic_target() {
-	local FORMAT=${1:-"extended"}
+function _create_dynamic_target() {
+	local FORMAT="${1:?FORMAT parameter required}"
+	local NCPATH="${2:?NCPATH parameter required}"
 
 	DSTMAC=$(ip netns exec "${NAMESPACE}" \
 		 ip link show "${DSTIF}" | awk '/ether/ {print $2}')
 
 	# Create a dynamic target
-	mkdir "${NETCONS_PATH}"
+	mkdir "${NCPATH}"
 
-	echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
-	echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
-	echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
-	echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+	echo "${DSTIP}" > "${NCPATH}"/remote_ip
+	echo "${SRCIP}" > "${NCPATH}"/local_ip
+	echo "${DSTMAC}" > "${NCPATH}"/remote_mac
+	echo "${SRCIF}" > "${NCPATH}"/dev_name
 
 	if [ "${FORMAT}" == "basic" ]
 	then
 		# Basic target does not support release
-		echo 0 > "${NETCONS_PATH}"/release
-		echo 0 > "${NETCONS_PATH}"/extended
+		echo 0 > "${NCPATH}"/release
+		echo 0 > "${NCPATH}"/extended
 	elif [ "${FORMAT}" == "extended" ]
 	then
-		echo 1 > "${NETCONS_PATH}"/extended
+		echo 1 > "${NCPATH}"/extended
 	fi
+}
 
-	echo 1 > "${NETCONS_PATH}"/enabled
+function create_dynamic_target() {
+	local FORMAT=${1:-"extended"}
+	local NCPATH=${2:-"$NETCONS_PATH"}
+	_create_dynamic_target "${FORMAT}" "${NCPATH}"
+
+	echo 1 > "${NCPATH}"/enabled
 
 	# This will make sure that the kernel was able to
 	# load the netconsole driver configuration. The console message
@@ -185,14 +200,26 @@ function do_cleanup() {
 	echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
 }
 
-function cleanup() {
+function cleanup_netcons() {
 	# delete netconsole dynamic reconfiguration
-	echo 0 > "${NETCONS_PATH}"/enabled
+	# do not fail if the target is already disabled
+	if [[ ! -d "${NETCONS_PATH}" ]]
+	then
+		# in some cases this is called before netcons path is created
+		return
+	fi
+	if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]]
+	then
+		echo 0 > "${NETCONS_PATH}"/enabled || true
+	fi
 	# Remove all the keys that got created during the selftest
 	find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
 	# Remove the configfs entry
 	rmdir "${NETCONS_PATH}"
+}
 
+function cleanup() {
+	cleanup_netcons
 	do_cleanup
 }
 
@@ -222,7 +249,7 @@ function listen_port_and_save_to() {
 
 	# Just wait for 2 seconds
 	timeout 2 ip netns exec "${NAMESPACE}" \
-		socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}"
+		socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" 2> /dev/null
 }
 
 # Only validate that the message arrived properly
@@ -369,3 +396,24 @@ function wait_for_port() {
 	# more frequently on IPv6
 	sleep 1
 }
+
+# Clean up netdevsim ifaces created for bonding test
+function cleanup_bond_nsim() {
+	ip -n "${TXNS}" \
+		link delete "${BOND_TX_MAIN_IF}" type bond || true
+	ip -n "${RXNS}" \
+		link delete "${BOND_RX_MAIN_IF}" type bond || true
+
+	cleanup_netdevsim "$NSIM_BOND_TX_1"
+	cleanup_netdevsim "$NSIM_BOND_TX_2"
+	cleanup_netdevsim "$NSIM_BOND_RX_1"
+	cleanup_netdevsim "$NSIM_BOND_RX_2"
+}
+
+# cleanup tests that use bonding interfaces
+function cleanup_bond() {
+	cleanup_netcons
+	cleanup_bond_nsim
+	cleanup_all_ns
+	ip link delete "${VETH0}" || true
+}
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index a3446b569976..2022f3061738 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -28,8 +28,6 @@ OUTPUT_FILE="/tmp/${TARGET}"
 
 # Check for basic system dependency and exit if not found
 check_for_dependencies
-# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
-echo "6 5" > /proc/sys/kernel/printk
 # Remove the namespace, interfaces and netconsole target on exit
 trap cleanup EXIT
 
@@ -39,6 +37,9 @@ do
 	for IP_VERSION in "ipv6" "ipv4"
 	do
 		echo "Running with target mode: ${FORMAT} (${IP_VERSION})"
+		# Set current loglevel to KERN_INFO(6), and default to
+		# KERN_NOTICE(5)
+		echo "6 5" > /proc/sys/kernel/printk
 		# Create one namespace and two interfaces
 		set_network "${IP_VERSION}"
 		# Create a dynamic target for netconsole
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
index 29bad56448a2..06089643b771 100755
--- a/tools/testing/selftests/drivers/net/netcons_overflow.sh
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -15,7 +15,7 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
 
 source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
 # This is coming from netconsole code. Check for it in drivers/net/netconsole.c
-MAX_USERDATA_ITEMS=16
+MAX_USERDATA_ITEMS=256
 
 # Function to create userdata entries
 function create_userdata_max_entries() {
diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh
new file mode 100755
index 000000000000..2ce9ee3719d1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_torture.sh
@@ -0,0 +1,130 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Repeatedly send kernel messages, toggles netconsole targets on and off,
+# creates and deletes targets in parallel, and toggles the source interface to
+# simulate stress conditions.
+#
+# This test aims to verify the robustness of netconsole under dynamic
+# configurations and concurrent operations.
+#
+# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make
+# sure no issues is reported.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+
+# Number of times the main loop run
+ITERATIONS=${1:-150}
+
+# Only test extended format
+FORMAT="extended"
+# And ipv6 only
+IP_VERSION="ipv6"
+
+# Create, enable and delete some targets.
+create_and_delete_random_target() {
+	COUNT=2
+	RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_)
+
+	if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}"  ] || \
+	   [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then
+		echo "Function didn't finish yet, skipping it." >&2
+		return
+	fi
+
+	# enable COUNT targets
+	for i in $(seq ${COUNT})
+	do
+		RND_TARGET="${RND_PREFIX}"${i}
+		RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+
+		# Basic population so the target can come up
+		_create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}"
+	done
+
+	echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg
+	# disable them all
+	for i in $(seq ${COUNT})
+	do
+		RND_TARGET="${RND_PREFIX}"${i}
+		RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}"
+		if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]]
+		then
+			echo 0 > "${RND_TARGET_PATH}"/enabled
+		fi
+		rmdir "${RND_TARGET_PATH}"
+	done
+}
+
+# Disable and enable the target mid-air, while messages
+# are being transmitted.
+toggle_netcons_target() {
+	for i in $(seq 2)
+	do
+		if [ ! -d "${NETCONS_PATH}" ]
+		then
+			break
+		fi
+		echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+		# Try to enable a bit harder, given it might fail to enable
+		# Write to `enabled` might fail depending on the lock, which is
+		# highly contentious here
+		for _ in $(seq 5)
+		do
+			echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true
+		done
+	done
+}
+
+toggle_iface(){
+	ip link set "${SRCIF}" down
+	ip link set "${SRCIF}" up
+}
+
+# Start here
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network "${IP_VERSION}"
+# Create a dynamic target for netconsole
+create_dynamic_target "${FORMAT}"
+
+for i in $(seq "$ITERATIONS")
+do
+	for _ in $(seq 10)
+	do
+		echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg
+	done
+	wait
+
+	if (( i % 30 == 0 )); then
+		toggle_netcons_target &
+	fi
+
+	if (( i % 50 == 0 )); then
+		# create some targets, enable them, send msg and disable
+		# all in a parallel thread
+		create_and_delete_random_target &
+	fi
+
+	if (( i % 70 == 0 )); then
+		toggle_iface &
+	fi
+done
+wait
+
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
index daf51113c827..1a228c5430f5 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/Makefile
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -8,7 +8,6 @@ TEST_PROGS := \
 	ethtool-features.sh \
 	ethtool-fec.sh \
 	ethtool-pause.sh \
-	ethtool-ring.sh \
 	fib.sh \
 	fib_notifications.sh \
 	hw_stats_l3.sh \
@@ -20,4 +19,8 @@ TEST_PROGS := \
 	udp_tunnel_nic.sh \
 # end of TEST_PROGS
 
+TEST_FILES := \
+	ethtool-common.sh
+# end of TEST_FILES
+
 include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 030762b203d7..1b529ccaf050 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -3,7 +3,8 @@
 
 lib_dir=$(dirname $0)/../../../net/forwarding
 
-ALL_TESTS="fw_flash_test params_test regions_test reload_test \
+ALL_TESTS="fw_flash_test params_test  \
+	   params_default_test regions_test reload_test \
 	   netns_reload_test resource_test dev_info_test \
 	   empty_reporter_test dummy_reporter_test rate_test"
 NUM_NETIFS=0
@@ -78,17 +79,28 @@ fw_flash_test()
 param_get()
 {
 	local name=$1
+	local attr=${2:-value}
+	local cmode=${3:-driverinit}
 
 	cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \
-	       '.[][][].values[] | select(.cmode == "driverinit").value'
+	       '.[][][].values[] | select(.cmode == "'"$cmode"'").'"$attr"
 }
 
 param_set()
 {
 	local name=$1
 	local value=$2
+	local cmode=${3:-driverinit}
 
-	devlink dev param set $DL_HANDLE name $name cmode driverinit value $value
+	devlink dev param set $DL_HANDLE name $name cmode $cmode value $value
+}
+
+param_set_default()
+{
+	local name=$1
+	local cmode=${2:-driverinit}
+
+	devlink dev param set $DL_HANDLE name $name default cmode $cmode
 }
 
 check_value()
@@ -97,12 +109,18 @@ check_value()
 	local phase_name=$2
 	local expected_param_value=$3
 	local expected_debugfs_value=$4
+	local cmode=${5:-driverinit}
 	local value
+	local attr="value"
 
-	value=$(param_get $name)
-	check_err $? "Failed to get $name param value"
+	if [[ "$phase_name" == *"default"* ]]; then
+		attr="default"
+	fi
+
+	value=$(param_get $name $attr $cmode)
+	check_err $? "Failed to get $name param $attr"
 	[ "$value" == "$expected_param_value" ]
-	check_err $? "Unexpected $phase_name $name param value"
+	check_err $? "Unexpected $phase_name $name param $attr"
 	value=$(<$DEBUGFS_DIR/$name)
 	check_err $? "Failed to get $name debugfs value"
 	[ "$value" == "$expected_debugfs_value" ]
@@ -135,6 +153,92 @@ params_test()
 	log_test "params test"
 }
 
+value_to_debugfs()
+{
+	local value=$1
+
+	case "$value" in
+		true)
+			echo "Y"
+			;;
+		false)
+			echo "N"
+			;;
+		*)
+			echo "$value"
+			;;
+	esac
+}
+
+test_default()
+{
+	local param_name=$1
+	local new_value=$2
+	local expected_default=$3
+	local cmode=${4:-driverinit}
+	local default_debugfs
+	local new_debugfs
+	local expected_debugfs
+
+	default_debugfs=$(value_to_debugfs $expected_default)
+	new_debugfs=$(value_to_debugfs $new_value)
+
+	expected_debugfs=$default_debugfs
+	check_value $param_name initial-default $expected_default $expected_debugfs $cmode
+
+	param_set $param_name $new_value $cmode
+	check_err $? "Failed to set $param_name to $new_value"
+
+	expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$new_debugfs" || echo "$default_debugfs")
+	check_value $param_name post-set $new_value $expected_debugfs $cmode
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload device"
+
+	expected_debugfs=$new_debugfs
+	check_value $param_name post-reload-new-value $new_value $expected_debugfs $cmode
+
+	param_set_default $param_name $cmode
+	check_err $? "Failed to set $param_name to default"
+
+	expected_debugfs=$([ "$cmode" == "runtime" ] && echo "$default_debugfs" || echo "$new_debugfs")
+	check_value $param_name post-set-default $expected_default $expected_debugfs $cmode
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload device"
+
+	expected_debugfs=$default_debugfs
+	check_value $param_name post-reload-default $expected_default $expected_debugfs $cmode
+}
+
+params_default_test()
+{
+	RET=0
+
+	if ! devlink dev param help 2>&1 | grep -q "value VALUE | default"; then
+		echo "SKIP: devlink cli missing default feature"
+		return
+	fi
+
+	# Remove side effects of previous tests. Use plain param_set, because
+	# param_set_default is a feature under test here.
+	param_set max_macs 32 driverinit
+	check_err $? "Failed to reset max_macs to default value"
+	param_set test1 true driverinit
+	check_err $? "Failed to reset test1 to default value"
+	param_set test2 1234 runtime
+	check_err $? "Failed to reset test2 to default value"
+
+	devlink dev reload $DL_HANDLE
+	check_err $? "Failed to reload device for clean state"
+
+	test_default max_macs 16 32 driverinit
+	test_default test1 false true driverinit
+	test_default test2 100 1234 runtime
+
+	log_test "params default test"
+}
+
 check_region_size()
 {
 	local name=$1
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
deleted file mode 100755
index c969559ffa7a..000000000000
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0-only
-
-source ethtool-common.sh
-
-function get_value {
-    local query="${SETTINGS_MAP[$1]}"
-
-    echo $(ethtool -g $NSIM_NETDEV | \
-        tail -n +$CURR_SETT_LINE | \
-        awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}')
-}
-
-function update_current_settings {
-    for key in ${!SETTINGS_MAP[@]}; do
-        CURRENT_SETTINGS[$key]=$(get_value $key)
-    done
-    echo ${CURRENT_SETTINGS[@]}
-}
-
-if ! ethtool -h | grep -q set-ring >/dev/null; then
-    echo "SKIP: No --set-ring support in ethtool"
-    exit 4
-fi
-
-NSIM_NETDEV=$(make_netdev)
-
-set -o pipefail
-
-declare -A SETTINGS_MAP=(
-    ["rx"]="RX"
-    ["rx-mini"]="RX Mini"
-    ["rx-jumbo"]="RX Jumbo"
-    ["tx"]="TX"
-)
-
-declare -A EXPECTED_SETTINGS=(
-    ["rx"]=""
-    ["rx-mini"]=""
-    ["rx-jumbo"]=""
-    ["tx"]=""
-)
-
-declare -A CURRENT_SETTINGS=(
-    ["rx"]=""
-    ["rx-mini"]=""
-    ["rx-jumbo"]=""
-    ["tx"]=""
-)
-
-MAX_VALUE=$((RANDOM % $((2**32-1))))
-RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/)
-
-for ring_max_entry in $RING_MAX_LIST; do
-    echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry
-done
-
-CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:)
-
-# populate the expected settings map
-for key in ${!SETTINGS_MAP[@]}; do
-    EXPECTED_SETTINGS[$key]=$(get_value $key)
-done
-
-# test
-for key in ${!SETTINGS_MAP[@]}; do
-    value=$((RANDOM % $MAX_VALUE))
-
-    ethtool -G $NSIM_NETDEV "$key" "$value"
-
-    EXPECTED_SETTINGS[$key]="$value"
-    expected=${EXPECTED_SETTINGS[@]}
-    current=$(update_current_settings)
-
-    check $? "$current" "$expected"
-    set +x
-done
-
-if [ $num_errors -eq 0 ]; then
-    echo "PASSED all $((num_passes)) checks"
-    exit 0
-else
-    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
-    exit 1
-fi
diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py
index 4ae7a785ff10..06559ef49b9a 100755
--- a/tools/testing/selftests/drivers/net/psp.py
+++ b/tools/testing/selftests/drivers/net/psp.py
@@ -109,6 +109,10 @@ def _check_data_outq(s, exp_len, force_wait=False):
         time.sleep(0.01)
     ksft_eq(outq, exp_len)
 
+
+def _get_stat(cfg, key):
+    return cfg.pspnl.get_stats({'dev-id': cfg.psp_dev_id})[key]
+
 #
 # Test case boiler plate
 #
@@ -171,11 +175,16 @@ def dev_rotate(cfg):
     """ Test key rotation """
     _init_psp_dev(cfg)
 
+    prev_rotations = _get_stat(cfg, 'key-rotations')
+
     rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
     ksft_eq(rot['id'], cfg.psp_dev_id)
     rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
     ksft_eq(rot['id'], cfg.psp_dev_id)
 
+    cur_rotations = _get_stat(cfg, 'key-rotations')
+    ksft_eq(cur_rotations, prev_rotations + 2)
+
 
 def dev_rotate_spi(cfg):
     """ Test key rotation and SPI check """
@@ -475,6 +484,7 @@ def data_stale_key(cfg):
     """ Test send on a double-rotated key """
     _init_psp_dev(cfg)
 
+    prev_stale = _get_stat(cfg, 'stale-events')
     s = _make_psp_conn(cfg)
     try:
         rx_assoc = cfg.pspnl.rx_assoc({"version": 0,
@@ -495,6 +505,9 @@ def data_stale_key(cfg):
         cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
         cfg.pspnl.key_rotate({"id": cfg.psp_dev_id})
 
+        cur_stale = _get_stat(cfg, 'stale-events')
+        ksft_gt(cur_stale, prev_stale)
+
         s.send(b'0123456789' * 200)
         _check_data_outq(s, 2000, force_wait=True)
     finally:
diff --git a/tools/testing/selftests/drivers/net/ring_reconfig.py b/tools/testing/selftests/drivers/net/ring_reconfig.py
new file mode 100755
index 000000000000..f9530a8b0856
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ring_reconfig.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Test channel and ring size configuration via ethtool (-L / -G).
+"""
+
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq
+from lib.py import NetDrvEpEnv, EthtoolFamily, GenerateTraffic
+from lib.py import defer, NlError
+
+
+def channels(cfg) -> None:
+    """
+    Twiddle channel counts in various combinations of parameters.
+    We're only looking for driver adhering to the requested config
+    if the config is accepted and crashes.
+    """
+    ehdr = {'header':{'dev-index': cfg.ifindex}}
+    chans = cfg.eth.channels_get(ehdr)
+
+    all_keys = ["rx", "tx", "combined"]
+    mixes = [{"combined"}, {"rx", "tx"}, {"rx", "combined"}, {"tx", "combined"},
+             {"rx", "tx", "combined"},]
+
+    # Get the set of keys that device actually supports
+    restore = {}
+    supported = set()
+    for key in all_keys:
+        if key + "-max" in chans:
+            supported.add(key)
+            restore |= {key + "-count": chans[key + "-count"]}
+
+    defer(cfg.eth.channels_set, ehdr | restore)
+
+    def test_config(config):
+        try:
+            cfg.eth.channels_set(ehdr | config)
+            get = cfg.eth.channels_get(ehdr)
+            for k, v in config.items():
+                ksft_eq(get.get(k, 0), v)
+        except NlError as e:
+            failed.append(mix)
+            ksft_pr("Can't set", config, e)
+        else:
+            ksft_pr("Okay", config)
+
+    failed = []
+    for mix in mixes:
+        if not mix.issubset(supported):
+            continue
+
+        # Set all the values in the mix to 1, other supported to 0
+        config = {}
+        for key in all_keys:
+            config[key + "-count"] = 1 if key in mix else 0
+        test_config(config)
+
+    for mix in mixes:
+        if not mix.issubset(supported):
+            continue
+        if mix in failed:
+            continue
+
+        # Set all the values in the mix to max, other supported to 0
+        config = {}
+        for key in all_keys:
+            config[key + "-count"] = chans[key + '-max'] if key in mix else 0
+        test_config(config)
+
+
+def _configure_min_ring_cnt(cfg) -> None:
+    """ Try to configure a single Rx/Tx ring. """
+    ehdr = {'header':{'dev-index': cfg.ifindex}}
+    chans = cfg.eth.channels_get(ehdr)
+
+    all_keys = ["rx-count", "tx-count", "combined-count"]
+    restore = {}
+    config = {}
+    for key in all_keys:
+        if key in chans:
+            restore[key] = chans[key]
+            config[key] = 0
+
+    if chans.get('combined-count', 0) > 1:
+        config['combined-count'] = 1
+    elif chans.get('rx-count', 0) > 1 and chans.get('tx-count', 0) > 1:
+        config['tx-count'] = 1
+        config['rx-count'] = 1
+    else:
+        # looks like we're already on 1 channel
+        return
+
+    cfg.eth.channels_set(ehdr | config)
+    defer(cfg.eth.channels_set, ehdr | restore)
+
+
+def ringparam(cfg) -> None:
+    """
+    Tweak the ringparam configuration. Try to run some traffic over min
+    ring size to make sure it actually functions.
+    """
+    ehdr = {'header':{'dev-index': cfg.ifindex}}
+    rings = cfg.eth.rings_get(ehdr)
+
+    restore = {}
+    maxes = {}
+    params = set()
+    for key in rings.keys():
+        if 'max' in key:
+            param = key[:-4]
+            maxes[param] = rings[key]
+            params.add(param)
+            restore[param] = rings[param]
+
+    defer(cfg.eth.rings_set, ehdr | restore)
+
+    # Speed up the reconfig by configuring just one ring
+    _configure_min_ring_cnt(cfg)
+
+    # Try to reach min on all settings
+    for param in params:
+        val = rings[param]
+        while True:
+            try:
+                cfg.eth.rings_set({'header':{'dev-index': cfg.ifindex},
+                                   param: val // 2})
+                if val == 0:
+                    break
+                val //= 2
+            except NlError:
+                break
+
+        get = cfg.eth.rings_get(ehdr)
+        ksft_eq(get[param], val)
+
+        ksft_pr(f"Reached min for '{param}' at {val} (max {rings[param]})")
+
+    GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+    # Try max across all params, if the driver supports large rings
+    # this may OOM so we ignore errors
+    try:
+        ksft_pr("Applying max settings")
+        config = {p: maxes[p] for p in params}
+        cfg.eth.rings_set(ehdr | config)
+    except NlError as e:
+        ksft_pr("Can't set max params", config, e)
+    else:
+        GenerateTraffic(cfg).wait_pkts_and_stop(10000)
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+
+    with NetDrvEpEnv(__file__) as cfg:
+        cfg.eth = EthtoolFamily()
+
+        ksft_run([channels,
+                  ringparam],
+                 args=(cfg, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 04d0a2a13e73..b08e4d48b15c 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -263,14 +263,15 @@ def procfs_downup_hammer(cfg) -> None:
     Reading stats via procfs only holds the RCU lock, drivers often try
     to sleep when reading the stats, or don't protect against races.
     """
-    # Max out the queues, we'll flip between max and 1
+    # Set a large number of queues,
+    # we'll flip between min(max_queues, 64) and 1
     channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
     if channels['combined-count'] == 0:
         rx_type = 'rx'
     else:
         rx_type = 'combined'
     cur_queue_cnt = channels[f'{rx_type}-count']
-    max_queue_cnt = channels[f'{rx_type}-max']
+    max_queue_cnt = min(channels[f'{rx_type}-max'], 64)
 
     cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
     defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py
index a148004e1c36..e54df158dfe9 100755
--- a/tools/testing/selftests/drivers/net/xdp.py
+++ b/tools/testing/selftests/drivers/net/xdp.py
@@ -12,6 +12,7 @@ from dataclasses import dataclass
 from enum import Enum
 
 from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr
+from lib.py import KsftNamedVariant, ksft_variants
 from lib.py import KsftFailEx, NetDrvEpEnv
 from lib.py import EthtoolFamily, NetdevFamily, NlError
 from lib.py import bkg, cmd, rand_port, wait_port_listen
@@ -672,7 +673,18 @@ def test_xdp_native_adjst_head_shrnk_data(cfg):
     _validate_res(res, offset_lst, pkt_sz_lst)
 
 
-def _test_xdp_native_ifc_stats(cfg, act):
+@ksft_variants([
+    KsftNamedVariant("pass", XDPAction.PASS),
+    KsftNamedVariant("drop", XDPAction.DROP),
+    KsftNamedVariant("tx", XDPAction.TX),
+])
+def test_xdp_native_qstats(cfg, act):
+    """
+    Send 1000 messages. Expect XDP action specified in @act.
+    Make sure the packets were counted to interface level qstats
+    (Rx, and Tx if act is TX).
+    """
+
     cfg.require_cmd("socat")
 
     bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500)
@@ -687,9 +699,12 @@ def _test_xdp_native_ifc_stats(cfg, act):
         "/dev/null"
     # Listener runs on "remote" in case of XDP_TX
     rx_host = cfg.remote if act == XDPAction.TX else None
-    # We want to spew 2000 packets quickly, bash seems to do a good enough job
-    tx_udp =  f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
-        "for i in `seq 2000`; do echo a >&5; done; exec 5>&-"
+    # We want to spew 1000 packets quickly, bash seems to do a good enough job
+    # Each reopening of the socket gives us a differenot local port (for RSS)
+    tx_udp = "for _ in `seq 20`; do " \
+        f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \
+        "for i in `seq 50`; do echo a >&5; done; " \
+        "exec 5>&-; done"
 
     cfg.wait_hw_stats_settle()
     # Qstats have more clearly defined semantics than rtnetlink.
@@ -704,11 +719,11 @@ def _test_xdp_native_ifc_stats(cfg, act):
     cfg.wait_hw_stats_settle()
     after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
 
-    ksft_ge(after['rx-packets'] - before['rx-packets'], 2000)
+    expected_pkts = 1000
+    ksft_ge(after['rx-packets'] - before['rx-packets'], expected_pkts)
     if act == XDPAction.TX:
-        ksft_ge(after['tx-packets'] - before['tx-packets'], 2000)
+        ksft_ge(after['tx-packets'] - before['tx-packets'], expected_pkts)
 
-    expected_pkts = 2000
     stats = _get_stats(prog_info["maps"]["map_xdp_stats"])
     ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch")
     if act == XDPAction.TX:
@@ -730,30 +745,6 @@ def _test_xdp_native_ifc_stats(cfg, act):
             ksft_ge(after['tx-packets'], before['tx-packets'])
 
 
-def test_xdp_native_qstats_pass(cfg):
-    """
-    Send 2000 messages, expect XDP_PASS, make sure the packets were counted
-    to interface level qstats (Rx).
-    """
-    _test_xdp_native_ifc_stats(cfg, XDPAction.PASS)
-
-
-def test_xdp_native_qstats_drop(cfg):
-    """
-    Send 2000 messages, expect XDP_DROP, make sure the packets were counted
-    to interface level qstats (Rx).
-    """
-    _test_xdp_native_ifc_stats(cfg, XDPAction.DROP)
-
-
-def test_xdp_native_qstats_tx(cfg):
-    """
-    Send 2000 messages, expect XDP_TX, make sure the packets were counted
-    to interface level qstats (Rx and Tx)
-    """
-    _test_xdp_native_ifc_stats(cfg, XDPAction.TX)
-
-
 def main():
     """
     Main function to execute the XDP tests.
@@ -778,9 +769,7 @@ def main():
                 test_xdp_native_adjst_tail_shrnk_data,
                 test_xdp_native_adjst_head_grow_data,
                 test_xdp_native_adjst_head_shrnk_data,
-                test_xdp_native_qstats_pass,
-                test_xdp_native_qstats_drop,
-                test_xdp_native_qstats_tx,
+                test_xdp_native_qstats,
             ],
             args=(cfg,))
     ksft_exit()
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
index c43a69dffd83..a0c64f415a7f 100644
--- a/tools/testing/selftests/filesystems/utils.c
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -487,7 +487,7 @@ int setup_userns(void)
 	uid_t uid = getuid();
 	gid_t gid = getgid();
 
-	ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+	ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
 	if (ret) {
 		ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
 				   strerror(errno));
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
new file mode 100644
index 000000000000..7daf7292209e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc
@@ -0,0 +1,107 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic tests on writing to trace_marker_raw
+# requires: trace_marker_raw
+# flags: instance
+
+is_little_endian() {
+	if lscpu | grep -q 'Little Endian'; then
+		echo 1;
+	else
+		echo 0;
+	fi
+}
+
+little=`is_little_endian`
+
+make_str() {
+	id=$1
+	cnt=$2
+
+	if [ $little -eq 1 ]; then
+		val=`printf "\\%03o\\%03o\\%03o\\%03o" \
+			$(($id & 0xff)) \
+			$((($id >> 8) & 0xff)) \
+			$((($id >> 16) & 0xff)) \
+			$((($id >> 24) & 0xff))`
+	else
+		val=`printf "\\%03o\\%03o\\%03o\\%03o" \
+			$((($id >> 24) & 0xff)) \
+			$((($id >> 16) & 0xff)) \
+			$((($id >> 8) & 0xff)) \
+			$(($id & 0xff))`
+	fi
+
+	data=`printf -- 'X%.0s' $(seq $cnt)`
+
+	printf "${val}${data}"
+}
+
+write_buffer() {
+	id=$1
+	size=$2
+
+	# write the string into the raw marker
+	make_str $id $size > trace_marker_raw
+}
+
+
+test_multiple_writes() {
+
+	# Write a bunch of data where the id is the count of
+	# data to write
+	for i in `seq 1 10` `seq 101 110` `seq 1001 1010`; do
+		write_buffer $i $i
+	done
+
+	# add a little buffer
+	echo stop > trace_marker
+
+	# Check to make sure the number of entries is the id (rounded up by 4)
+	awk '/.*: # [0-9a-f]* / {
+			print;
+			cnt = -1;
+			for (i = 0; i < NF; i++) {
+				# The counter is after the "#" marker
+				if ( $i == "#" ) {
+					i++;
+					cnt = strtonum("0x" $i);
+					num = NF - (i + 1);
+					# The number of items is always rounded up by 4
+					cnt2 = int((cnt + 3) / 4) * 4;
+					if (cnt2 != num) {
+						exit 1;
+					}
+					break;
+				}
+			}
+		}
+	// { if (NR > 30) { exit 0; } } ' trace_pipe;
+}
+
+
+get_buffer_data_size() {
+	sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+test_buffer() {
+
+	# The id must be four bytes, test that 3 bytes fails a write
+	if echo -n abc > ./trace_marker_raw ; then
+		echo "Too small of write expected to fail but did not"
+		exit_fail
+	fi
+
+	size=`get_buffer_data_size`
+	echo size = $size
+
+	# Now add a little more than what it can handle
+
+	if write_buffer 0xdeadbeef $size ; then
+		echo "Too big of write expected to fail but did not"
+		exit_fail
+	fi
+}
+
+test_buffer
+test_multiple_writes
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index 2506f464811b..47067a5e3cb0 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -28,25 +28,21 @@ test -d events/fprobes/myevent1
 test -d events/fprobes/myevent2
 
 echo 1 > events/fprobes/myevent1/enable
-# Make sure the event is attached and is the only one
+# Make sure the event is attached.
 grep -q $PLACE enabled_functions
 cnt=`cat enabled_functions | wc -l`
-if [ $cnt -ne $((ocnt + 1)) ]; then
+if [ $cnt -eq $ocnt ]; then
 	exit_fail
 fi
 
 echo 1 > events/fprobes/myevent2/enable
-# It should till be the only attached function
-cnt=`cat enabled_functions | wc -l`
-if [ $cnt -ne $((ocnt + 1)) ]; then
-	exit_fail
-fi
+cnt2=`cat enabled_functions | wc -l`
 
 echo 1 > events/fprobes/myevent3/enable
 # If the function is different, the attached function should be increased
 grep -q $PLACE2 enabled_functions
 cnt=`cat enabled_functions | wc -l`
-if [ $cnt -ne $((ocnt + 2)) ]; then
+if [ $cnt -eq $cnt2 ]; then
 	exit_fail
 fi
 
@@ -56,12 +52,6 @@ echo "-:myevent2" >> dynamic_events
 grep -q myevent1 dynamic_events
 ! grep -q myevent2 dynamic_events
 
-# should still have 2 left
-cnt=`cat enabled_functions | wc -l`
-if [ $cnt -ne $((ocnt + 2)) ]; then
-	exit_fail
-fi
-
 echo 0 > events/fprobes/enable
 echo > dynamic_events
 
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc
new file mode 100644
index 000000000000..c1f1cafa30f3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - enable/disable tracepoint probe events
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT=sched_switch
+ENABLEFILE=events/tracepoints/myprobe/enable
+
+:;: "Add tracepoint event on $TRACEPOINT" ;:
+
+echo "t:myprobe ${TRACEPOINT}" >> dynamic_events
+
+:;: "Check enable/disable to ensure it works" ;:
+
+echo 1 > $ENABLEFILE
+
+grep -q $TRACEPOINT trace
+
+echo 0 > $ENABLEFILE
+
+echo > trace
+
+! grep -q $TRACEPOINT trace
+
+:;: "Repeat enable/disable to ensure it works" ;:
+
+echo 1 > $ENABLEFILE
+
+grep -q $TRACEPOINT trace
+
+echo 0 > $ENABLEFILE
+
+echo > trace
+
+! grep -q $TRACEPOINT trace
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index c62165fabd0c..cfa16aa1f39a 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -20,6 +20,10 @@ sample_events() {
 echo 0 > tracing_on
 echo 0 > events/enable
 
+# Clear functions caused by page cache; run sample_events twice
+sample_events
+sample_events
+
 echo "Get the most frequently calling function"
 echo > trace
 sample_events
diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py
index 5d2ffa3d5977..ece0ba8e7d34 100644
--- a/tools/testing/selftests/hid/tests/test_multitouch.py
+++ b/tools/testing/selftests/hid/tests/test_multitouch.py
@@ -1752,6 +1752,52 @@ class TestWin8TSConfidence(BaseTest.TestWin8Multitouch):
         assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
 
 
+    @pytest.mark.skip_if_uhdev(
+        lambda uhdev: "Confidence" not in uhdev.fields,
+        "Device not compatible, missing Confidence usage",
+    )
+    def test_mt_confidence_bad_multi_release(self):
+        """Check for the sticky finger being properly detected.
+
+        We first inject 3 fingers, then release only the second.
+        After 100 ms, we should receive a generated event about the
+        2 missing fingers being released.
+        """
+        uhdev = self.uhdev
+        evdev = uhdev.get_evdev()
+
+        # send 3 touches
+        t0 = Touch(1, 50, 10)
+        t1 = Touch(2, 150, 100)
+        t2 = Touch(3, 250, 200)
+        r = uhdev.event([t0, t1, t2])
+        events = uhdev.next_sync_events()
+        self.debug_reports(r, uhdev, events)
+
+        # release the second
+        t1.tipswitch = False
+        r = uhdev.event([t1])
+        events = uhdev.next_sync_events()
+        self.debug_reports(r, uhdev, events)
+
+        # only the second is released
+        assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+        assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+        assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+
+        # wait for the timer to kick in
+        time.sleep(0.2)
+
+        events = uhdev.next_sync_events()
+        self.debug_reports([], uhdev, events)
+
+        # now all 3 fingers are released
+        assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+        assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+        assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+        assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
 class TestElanXPS9360(BaseTest.TestWin8Multitouch):
     def create_device(self):
         return Digitizer(
@@ -2086,3 +2132,12 @@ class Testsynaptics_06cb_ce08(BaseTest.TestPTP):
             input_info=(BusType.I2C, 0x06CB, 0xCE08),
             rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
         )
+
+class Testsynaptics_06cb_ce26(TestWin8TSConfidence):
+    def create_device(self):
+        return PTP(
+            "uhid test synaptics_06cb_ce26",
+            max_contacts=5,
+            input_info=(BusType.I2C, 0x06CB, 0xCE26),
+            rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 45 05 75 10 55 0e 65 11 09 30 35 00 46 64 04 95 01 81 02 46 a2 02 26 29 03 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+        )
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 3eebf5e3b974..bb4d33dde3c8 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -2638,6 +2638,8 @@ TEST_F(vfio_compat_mock_domain, map)
 	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
 	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
 	ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+	/* Unmap of empty is success */
+	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
 
 	/* UNMAP_FLAG_ALL requires 0 iova/size */
 	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 772ca1db6e59..9f472c20c190 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -1044,8 +1044,8 @@ static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents)
 	};
 
 	while (nvevents--) {
-		if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT),
-			    &trigger_vevent_cmd))
+		if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT),
+			  &trigger_vevent_cmd))
 			return -1;
 	}
 	return 0;
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 2c3c58e65a41..3a62039fa621 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -44,6 +44,12 @@ tap_timeout()
 	fi
 }
 
+report_failure()
+{
+	echo "not ok $*"
+	echo "$*" >> "$kselftest_failures_file"
+}
+
 run_one()
 {
 	DIR="$1"
@@ -105,7 +111,7 @@ run_one()
 	echo "# $TEST_HDR_MSG"
 	if [ ! -e "$TEST" ]; then
 		echo "# Warning: file $TEST is missing!"
-		echo "not ok $test_num $TEST_HDR_MSG"
+		report_failure "$test_num $TEST_HDR_MSG"
 	else
 		if [ -x /usr/bin/stdbuf ]; then
 			stdbuf="/usr/bin/stdbuf --output=L "
@@ -123,7 +129,7 @@ run_one()
 				interpreter=$(head -n 1 "$TEST" | cut -c 3-)
 				cmd="$stdbuf $interpreter ./$BASENAME_TEST"
 			else
-				echo "not ok $test_num $TEST_HDR_MSG"
+				report_failure "$test_num $TEST_HDR_MSG"
 				return
 			fi
 		fi
@@ -137,9 +143,9 @@ run_one()
 			echo "ok $test_num $TEST_HDR_MSG # SKIP"
 		elif [ $rc -eq $timeout_rc ]; then \
 			echo "#"
-			echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
+			report_failure "$test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
 		else
-			echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
+			report_failure "$test_num $TEST_HDR_MSG # exit=$rc"
 		fi)
 		cd - >/dev/null
 	fi
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index 91906414a474..993c9e38e729 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -1020,7 +1020,7 @@ static void set_counter_defaults(void)
 {
 	const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
 	uint64_t freq = read_sysreg(CNTFRQ_EL0);
-	uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq);
+	int width = ilog2(MIN_ROLLOVER_SECS * freq);
 
 	width = clamp(width, 56, 64);
 	CVAL_MAX = GENMASK_ULL(width - 1, 0);
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
index 592b26ded779..d8fe17a6cc59 100644
--- a/tools/testing/selftests/kvm/arm64/external_aborts.c
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -359,6 +359,44 @@ static void test_mmio_ease(void)
 	kvm_vm_free(vm);
 }
 
+static void test_serror_amo_guest(void)
+{
+	/*
+	 * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked)
+	 * since the write is redirected to memory. But don't write (intentionally)
+	 * broken code!
+	 */
+	sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0);
+	isb();
+
+	GUEST_SYNC(0);
+	GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+	/*
+	 * KVM treats the effective value of AMO as 1 when
+	 * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when
+	 * unmasked.
+	 */
+	local_serror_enable();
+	isb();
+	local_serror_disable();
+
+	GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror_amo(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest,
+							unexpected_dabt_handler);
+
+	vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+	vcpu_run_expect_sync(vcpu);
+	vcpu_inject_serror(vcpu);
+	vcpu_run_expect_done(vcpu);
+	kvm_vm_free(vm);
+}
+
 int main(void)
 {
 	test_mmio_abort();
@@ -369,4 +407,9 @@ int main(void)
 	test_serror_emulated();
 	test_mmio_ease();
 	test_s1ptw_abort();
+
+	if (!test_supports_el2())
+		return 0;
+
+	test_serror_amo();
 }
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index 011fad95dd02..0a3a94c4cca1 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -63,8 +63,13 @@ static struct feature_id_reg feat_id_regs[] = {
 	REG_FEAT(HDFGWTR2_EL2,	ID_AA64MMFR0_EL1, FGT, FGT2),
 	REG_FEAT(ZCR_EL2,	ID_AA64PFR0_EL1, SVE, IMP),
 	REG_FEAT(SCTLR2_EL1,	ID_AA64MMFR3_EL1, SCTLRX, IMP),
+	REG_FEAT(SCTLR2_EL2,	ID_AA64MMFR3_EL1, SCTLRX, IMP),
 	REG_FEAT(VDISR_EL2,	ID_AA64PFR0_EL1, RAS, IMP),
 	REG_FEAT(VSESR_EL2,	ID_AA64PFR0_EL1, RAS, IMP),
+	REG_FEAT(VNCR_EL2,	ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY),
+	REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP),
+	REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP),
+	REG_FEAT(ZCR_EL2,	ID_AA64PFR0_EL1, SVE, IMP),
 };
 
 bool filter_reg(__u64 reg)
@@ -345,9 +350,20 @@ static __u64 base_regs[] = {
 	KVM_REG_ARM_FW_FEAT_BMAP_REG(1),	/* KVM_REG_ARM_STD_HYP_BMAP */
 	KVM_REG_ARM_FW_FEAT_BMAP_REG(2),	/* KVM_REG_ARM_VENDOR_HYP_BMAP */
 	KVM_REG_ARM_FW_FEAT_BMAP_REG(3),	/* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
-	ARM64_SYS_REG(3, 3, 14, 3, 1),	/* CNTV_CTL_EL0 */
-	ARM64_SYS_REG(3, 3, 14, 3, 2),	/* CNTV_CVAL_EL0 */
-	ARM64_SYS_REG(3, 3, 14, 0, 2),
+
+	/*
+	 * EL0 Virtual Timer Registers
+	 *
+	 * WARNING:
+	 * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+	 * with the appropriate register encodings.  Their values have been
+	 * accidentally swapped.  As this is set API, the definitions here
+	 * must be used, rather than ones derived from the encodings.
+	 */
+	KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0),
+	KVM_REG_ARM_TIMER_CVAL,
+	KVM_REG_ARM_TIMER_CNT,
+
 	ARM64_SYS_REG(3, 0, 0, 0, 0),	/* MIDR_EL1 */
 	ARM64_SYS_REG(3, 0, 0, 0, 6),	/* REVIDR_EL1 */
 	ARM64_SYS_REG(3, 1, 0, 0, 1),	/* CLIDR_EL1 */
@@ -704,6 +720,7 @@ static __u64 el2_regs[] = {
 	SYS_REG(VMPIDR_EL2),
 	SYS_REG(SCTLR_EL2),
 	SYS_REG(ACTLR_EL2),
+	SYS_REG(SCTLR2_EL2),
 	SYS_REG(HCR_EL2),
 	SYS_REG(MDCR_EL2),
 	SYS_REG(CPTR_EL2),
@@ -755,6 +772,10 @@ static __u64 el2_regs[] = {
 	SYS_REG(VSESR_EL2),
 };
 
+static __u64 el2_e2h0_regs[] = {
+	/* Empty */
+};
+
 #define BASE_SUBLIST \
 	{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
 #define VREGS_SUBLIST \
@@ -789,6 +810,15 @@ static __u64 el2_regs[] = {
 		.regs		= el2_regs,			\
 		.regs_n		= ARRAY_SIZE(el2_regs),		\
 	}
+#define EL2_E2H0_SUBLIST					\
+	EL2_SUBLIST,						\
+	{							\
+		.name 		= "EL2 E2H0",			\
+		.capability	= KVM_CAP_ARM_EL2_E2H0,		\
+		.feature	= KVM_ARM_VCPU_HAS_EL2_E2H0,	\
+		.regs		= el2_e2h0_regs,		\
+		.regs_n		= ARRAY_SIZE(el2_e2h0_regs),	\
+	}
 
 static struct vcpu_reg_list vregs_config = {
 	.sublists = {
@@ -897,6 +927,65 @@ static struct vcpu_reg_list el2_pauth_pmu_config = {
 	},
 };
 
+static struct vcpu_reg_list el2_e2h0_vregs_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_E2H0_SUBLIST,
+	VREGS_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_E2H0_SUBLIST,
+	VREGS_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_E2H0_SUBLIST,
+	SVE_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_E2H0_SUBLIST,
+	SVE_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_E2H0_SUBLIST,
+	VREGS_SUBLIST,
+	PAUTH_SUBLIST,
+	{0},
+	},
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	EL2_E2H0_SUBLIST,
+	VREGS_SUBLIST,
+	PAUTH_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
+
 struct vcpu_reg_list *vcpu_configs[] = {
 	&vregs_config,
 	&vregs_pmu_config,
@@ -911,5 +1000,12 @@ struct vcpu_reg_list *vcpu_configs[] = {
 	&el2_sve_pmu_config,
 	&el2_pauth_config,
 	&el2_pauth_pmu_config,
+
+	&el2_e2h0_vregs_config,
+	&el2_e2h0_vregs_pmu_config,
+	&el2_e2h0_sve_config,
+	&el2_e2h0_sve_pmu_config,
+	&el2_e2h0_pauth_config,
+	&el2_e2h0_pauth_pmu_config,
 };
 int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 8ff1e853f7f8..c4815d365816 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -249,11 +249,14 @@ static void guest_code(void)
 	GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+	GUEST_REG_SYNC(SYS_MPIDR_EL1);
+	GUEST_REG_SYNC(SYS_CLIDR_EL1);
 	GUEST_REG_SYNC(SYS_CTR_EL0);
 	GUEST_REG_SYNC(SYS_MIDR_EL1);
 	GUEST_REG_SYNC(SYS_REVIDR_EL1);
@@ -265,7 +268,9 @@ static void guest_code(void)
 /* Return a safe value to a given ftr_bits an ftr value */
 uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
 {
-	uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+	uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+	TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
 
 	if (ftr_bits->sign == FTR_UNSIGNED) {
 		switch (ftr_bits->type) {
@@ -317,7 +322,9 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
 /* Return an invalid value to a given ftr_bits an ftr value */
 uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
 {
-	uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+	uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+	TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
 
 	if (ftr_bits->sign == FTR_UNSIGNED) {
 		switch (ftr_bits->type) {
@@ -669,7 +676,7 @@ static void test_clidr(struct kvm_vcpu *vcpu)
 	clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
 
 	/* find the first empty level in the cache hierarchy */
-	for (level = 1; level < 7; level++) {
+	for (level = 1; level <= 7; level++) {
 		if (!CLIDR_CTYPE(clidr, level))
 			break;
 	}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index 87922a89b134..687d04463983 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -123,6 +123,7 @@ static void guest_setup_gic(void)
 static void guest_code(size_t nr_lpis)
 {
 	guest_setup_gic();
+	local_irq_enable();
 
 	GUEST_SYNC(0);
 
@@ -331,7 +332,7 @@ static void setup_vm(void)
 {
 	int i;
 
-	vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+	vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
 	TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
 
 	vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index b3ca6737f304..e7d9aeb418d3 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -14,8 +14,6 @@
 #include <linux/bitmap.h>
 #include <linux/falloc.h>
 #include <linux/sizes.h>
-#include <setjmp.h>
-#include <signal.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -24,7 +22,9 @@
 #include "test_util.h"
 #include "ucall_common.h"
 
-static void test_file_read_write(int fd)
+static size_t page_size;
+
+static void test_file_read_write(int fd, size_t total_size)
 {
 	char buf[64];
 
@@ -38,18 +38,22 @@ static void test_file_read_write(int fd)
 		    "pwrite on a guest_mem fd should fail");
 }
 
-static void test_mmap_supported(int fd, size_t page_size, size_t total_size)
+static void test_mmap_cow(int fd, size_t size)
+{
+	void *mem;
+
+	mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
+}
+
+static void test_mmap_supported(int fd, size_t total_size)
 {
 	const char val = 0xaa;
 	char *mem;
 	size_t i;
 	int ret;
 
-	mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-	TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
-
-	mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed.");
+	mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
 
 	memset(mem, val, total_size);
 	for (i = 0; i < total_size; i++)
@@ -68,45 +72,37 @@ static void test_mmap_supported(int fd, size_t page_size, size_t total_size)
 	for (i = 0; i < total_size; i++)
 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
 
-	ret = munmap(mem, total_size);
-	TEST_ASSERT(!ret, "munmap() should succeed.");
-}
-
-static sigjmp_buf jmpbuf;
-void fault_sigbus_handler(int signum)
-{
-	siglongjmp(jmpbuf, 1);
+	kvm_munmap(mem, total_size);
 }
 
-static void test_fault_overflow(int fd, size_t page_size, size_t total_size)
+static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
 {
-	struct sigaction sa_old, sa_new = {
-		.sa_handler = fault_sigbus_handler,
-	};
-	size_t map_size = total_size * 4;
 	const char val = 0xaa;
 	char *mem;
 	size_t i;
-	int ret;
 
-	mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed.");
+	mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
 
-	sigaction(SIGBUS, &sa_new, &sa_old);
-	if (sigsetjmp(jmpbuf, 1) == 0) {
-		memset(mem, 0xaa, map_size);
-		TEST_ASSERT(false, "memset() should have triggered SIGBUS.");
-	}
-	sigaction(SIGBUS, &sa_old, NULL);
+	TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
+	TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
 
-	for (i = 0; i < total_size; i++)
+	for (i = 0; i < accessible_size; i++)
 		TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
 
-	ret = munmap(mem, map_size);
-	TEST_ASSERT(!ret, "munmap() should succeed.");
+	kvm_munmap(mem, map_size);
+}
+
+static void test_fault_overflow(int fd, size_t total_size)
+{
+	test_fault_sigbus(fd, total_size, total_size * 4);
+}
+
+static void test_fault_private(int fd, size_t total_size)
+{
+	test_fault_sigbus(fd, 0, total_size);
 }
 
-static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size)
+static void test_mmap_not_supported(int fd, size_t total_size)
 {
 	char *mem;
 
@@ -117,7 +113,7 @@ static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size)
 	TEST_ASSERT_EQ(mem, MAP_FAILED);
 }
 
-static void test_file_size(int fd, size_t page_size, size_t total_size)
+static void test_file_size(int fd, size_t total_size)
 {
 	struct stat sb;
 	int ret;
@@ -128,7 +124,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size)
 	TEST_ASSERT_EQ(sb.st_blksize, page_size);
 }
 
-static void test_fallocate(int fd, size_t page_size, size_t total_size)
+static void test_fallocate(int fd, size_t total_size)
 {
 	int ret;
 
@@ -165,7 +161,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size)
 	TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
 }
 
-static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
+static void test_invalid_punch_hole(int fd, size_t total_size)
 {
 	struct {
 		off_t offset;
@@ -196,8 +192,7 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
 }
 
 static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
-						  uint64_t guest_memfd_flags,
-						  size_t page_size)
+						  uint64_t guest_memfd_flags)
 {
 	size_t size;
 	int fd;
@@ -214,7 +209,6 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
 {
 	int fd1, fd2, ret;
 	struct stat st1, st2;
-	size_t page_size = getpagesize();
 
 	fd1 = __vm_create_guest_memfd(vm, page_size, 0);
 	TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
@@ -239,9 +233,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
 	close(fd1);
 }
 
-static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags)
+static void test_guest_memfd_flags(struct kvm_vm *vm)
 {
-	size_t page_size = getpagesize();
+	uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
 	uint64_t flag;
 	int fd;
 
@@ -260,43 +254,57 @@ static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags)
 	}
 }
 
-static void test_guest_memfd(unsigned long vm_type)
+#define gmem_test(__test, __vm, __flags)				\
+do {									\
+	int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags);	\
+									\
+	test_##__test(fd, page_size * 4);				\
+	close(fd);							\
+} while (0)
+
+static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
 {
-	uint64_t flags = 0;
-	struct kvm_vm *vm;
-	size_t total_size;
-	size_t page_size;
-	int fd;
+	test_create_guest_memfd_multiple(vm);
+	test_create_guest_memfd_invalid_sizes(vm, flags);
 
-	page_size = getpagesize();
-	total_size = page_size * 4;
+	gmem_test(file_read_write, vm, flags);
 
-	vm = vm_create_barebones_type(vm_type);
+	if (flags & GUEST_MEMFD_FLAG_MMAP) {
+		if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
+			gmem_test(mmap_supported, vm, flags);
+			gmem_test(fault_overflow, vm, flags);
+		} else {
+			gmem_test(fault_private, vm, flags);
+		}
 
-	if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP))
-		flags |= GUEST_MEMFD_FLAG_MMAP;
+		gmem_test(mmap_cow, vm, flags);
+	} else {
+		gmem_test(mmap_not_supported, vm, flags);
+	}
 
-	test_create_guest_memfd_multiple(vm);
-	test_create_guest_memfd_invalid_sizes(vm, flags, page_size);
+	gmem_test(file_size, vm, flags);
+	gmem_test(fallocate, vm, flags);
+	gmem_test(invalid_punch_hole, vm, flags);
+}
 
-	fd = vm_create_guest_memfd(vm, total_size, flags);
+static void test_guest_memfd(unsigned long vm_type)
+{
+	struct kvm_vm *vm = vm_create_barebones_type(vm_type);
+	uint64_t flags;
 
-	test_file_read_write(fd);
+	test_guest_memfd_flags(vm);
 
-	if (flags & GUEST_MEMFD_FLAG_MMAP) {
-		test_mmap_supported(fd, page_size, total_size);
-		test_fault_overflow(fd, page_size, total_size);
-	} else {
-		test_mmap_not_supported(fd, page_size, total_size);
-	}
+	__test_guest_memfd(vm, 0);
 
-	test_file_size(fd, page_size, total_size);
-	test_fallocate(fd, page_size, total_size);
-	test_invalid_punch_hole(fd, page_size, total_size);
+	flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+	if (flags & GUEST_MEMFD_FLAG_MMAP)
+		__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
 
-	test_guest_memfd_flags(vm, flags);
+	/* MMAP should always be supported if INIT_SHARED is supported. */
+	if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
+		__test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
+				       GUEST_MEMFD_FLAG_INIT_SHARED);
 
-	close(fd);
 	kvm_vm_free(vm);
 }
 
@@ -328,22 +336,26 @@ static void test_guest_memfd_guest(void)
 	size_t size;
 	int fd, i;
 
-	if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP))
+	if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
 		return;
 
 	vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
 
-	TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP),
-		    "Default VM type should always support guest_memfd mmap()");
+	TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
+		    "Default VM type should support MMAP, supported flags = 0x%x",
+		    vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+	TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
+		    "Default VM type should support INIT_SHARED, supported flags = 0x%x",
+		    vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
 
 	size = vm->page_size;
-	fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP);
+	fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
+					     GUEST_MEMFD_FLAG_INIT_SHARED);
 	vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
 
-	mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed");
+	mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
 	memset(mem, 0xaa, size);
-	munmap(mem, size);
+	kvm_munmap(mem, size);
 
 	virt_pg_map(vm, gpa, gpa);
 	vcpu_args_set(vcpu, 2, gpa, size);
@@ -351,8 +363,7 @@ static void test_guest_memfd_guest(void)
 
 	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 
-	mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed");
+	mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
 	for (i = 0; i < size; i++)
 		TEST_ASSERT_EQ(mem[i], 0xff);
 
@@ -366,6 +377,8 @@ int main(int argc, char *argv[])
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
 
+	page_size = getpagesize();
+
 	/*
 	 * Not all architectures support KVM_CAP_VM_TYPES. However, those that
 	 * support guest_memfd have that support for the default VM type.
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 6f481475c135..ff928716574d 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -305,7 +305,17 @@ void test_wants_mte(void);
 void test_disable_default_vgic(void);
 
 bool vm_supports_el2(struct kvm_vm *vm);
-static bool vcpu_has_el2(struct kvm_vcpu *vcpu)
+
+static inline bool test_supports_el2(void)
+{
+	struct kvm_vm *vm = vm_create(1);
+	bool supported = vm_supports_el2(vm);
+
+	kvm_vm_free(vm);
+	return supported;
+}
+
+static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu)
 {
 	return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
 }
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 26cc30290e76..d3f3e455c031 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -286,6 +286,31 @@ static inline bool kvm_has_cap(long cap)
 #define __KVM_SYSCALL_ERROR(_name, _ret) \
 	"%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
 
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+			       off_t offset)
+{
+	void *mem;
+
+	mem = mmap(NULL, size, prot, flags, fd, offset);
+	TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+		    (int)(unsigned long)MAP_FAILED));
+
+	return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+	return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline void kvm_munmap(void *mem, size_t size)
+{
+	int ret;
+
+	ret = munmap(mem, size);
+	TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+}
+
 /*
  * Use the "inner", double-underscore macro when reporting errors from within
  * other macros so that the name of ioctl() and not its literal numeric value
@@ -1273,4 +1298,6 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
 
 uint32_t guest_get_vcpuid(void);
 
+bool kvm_arch_has_default_irqchip(void);
+
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index c6ef895fbd9a..b4872ba8ed12 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -8,6 +8,8 @@
 #ifndef SELFTEST_KVM_TEST_UTIL_H
 #define SELFTEST_KVM_TEST_UTIL_H
 
+#include <setjmp.h>
+#include <signal.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <stdbool.h>
@@ -78,6 +80,23 @@ do {									\
 	__builtin_unreachable(); \
 } while (0)
 
+extern sigjmp_buf expect_sigbus_jmpbuf;
+void expect_sigbus_handler(int signum);
+
+#define TEST_EXPECT_SIGBUS(action)						\
+do {										\
+	struct sigaction sa_old, sa_new = {					\
+		.sa_handler = expect_sigbus_handler,				\
+	};									\
+										\
+	sigaction(SIGBUS, &sa_new, &sa_old);					\
+	if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) {				\
+		action;								\
+		TEST_FAIL("'%s' should have triggered SIGBUS", #action);	\
+	}									\
+	sigaction(SIGBUS, &sa_old, NULL);					\
+} while (0)
+
 size_t parse_size(const char *size);
 
 int64_t timespec_to_ns(struct timespec ts);
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
index 7c301b4c7005..5d7590d01868 100644
--- a/tools/testing/selftests/kvm/irqfd_test.c
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -89,11 +89,19 @@ static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
 int main(int argc, char *argv[])
 {
 	pthread_t racing_thread;
+	struct kvm_vcpu *unused;
 	int r, i;
 
-	/* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */
-	vm1 = vm_create(1);
-	vm2 = vm_create(1);
+	TEST_REQUIRE(kvm_arch_has_default_irqchip());
+
+	/*
+	 * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also
+	 * create an unused vCPU as certain architectures (like arm64) need to
+	 * complete IRQ chip initialization after all possible vCPUs for a VM
+	 * have been created.
+	 */
+	vm1 = vm_create_with_one_vcpu(&unused, NULL);
+	vm2 = vm_create_with_one_vcpu(&unused, NULL);
 
 	WRITE_ONCE(__eventfd, kvm_new_eventfd());
 
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 09f270545646..0e2f8ed90f30 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -15,6 +15,8 @@
 #include "gic_v3.h"
 #include "processor.h"
 
+#define GITS_COLLECTION_TARGET_SHIFT 16
+
 static u64 its_read_u64(unsigned long offset)
 {
 	return readq_relaxed(GITS_BASE_GVA + offset);
@@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
 	its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
 }
 
+static u64 procnum_to_rdbase(u32 vcpu_id)
+{
+	return vcpu_id << GITS_COLLECTION_TARGET_SHIFT;
+}
+
 #define GITS_CMDQ_POLL_ITERATIONS	0
 
 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
@@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
 
 	its_encode_cmd(&cmd, GITS_CMD_MAPC);
 	its_encode_collection(&cmd, collection_id);
-	its_encode_target(&cmd, vcpu_id);
+	its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
 	its_encode_valid(&cmd, valid);
 
 	its_send_cmd(cmdq_base, &cmd);
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 369a4c87dd8f..54f6d17c78f7 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -725,3 +725,8 @@ void kvm_arch_vm_release(struct kvm_vm *vm)
 	if (vm->arch.has_gic)
 		close(vm->arch.gic_fd);
 }
+
+bool kvm_arch_has_default_irqchip(void)
+{
+	return request_vgic && kvm_supports_vgic_v3();
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6743fbd9bd67..1a93d6361671 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -741,13 +741,11 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
 	int ret;
 
 	if (vcpu->dirty_gfns) {
-		ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
-		TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+		kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
 		vcpu->dirty_gfns = NULL;
 	}
 
-	ret = munmap(vcpu->run, vcpu_mmap_sz());
-	TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+	kvm_munmap(vcpu->run, vcpu_mmap_sz());
 
 	ret = close(vcpu->fd);
 	TEST_ASSERT(!ret,  __KVM_SYSCALL_ERROR("close()", ret));
@@ -783,20 +781,16 @@ void kvm_vm_release(struct kvm_vm *vmp)
 static void __vm_mem_region_delete(struct kvm_vm *vm,
 				   struct userspace_mem_region *region)
 {
-	int ret;
-
 	rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
 	rb_erase(&region->hva_node, &vm->regions.hva_tree);
 	hash_del(&region->slot_node);
 
 	sparsebit_free(&region->unused_phy_pages);
 	sparsebit_free(&region->protected_phy_pages);
-	ret = munmap(region->mmap_start, region->mmap_size);
-	TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+	kvm_munmap(region->mmap_start, region->mmap_size);
 	if (region->fd >= 0) {
 		/* There's an extra map when using shared memory. */
-		ret = munmap(region->mmap_alias, region->mmap_size);
-		TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+		kvm_munmap(region->mmap_alias, region->mmap_size);
 		close(region->fd);
 	}
 	if (region->region.guest_memfd >= 0)
@@ -1053,12 +1047,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
 		region->fd = kvm_memfd_alloc(region->mmap_size,
 					     src_type == VM_MEM_SRC_SHARED_HUGETLB);
 
-	region->mmap_start = mmap(NULL, region->mmap_size,
-				  PROT_READ | PROT_WRITE,
-				  vm_mem_backing_src_alias(src_type)->flag,
-				  region->fd, 0);
-	TEST_ASSERT(region->mmap_start != MAP_FAILED,
-		    __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+	region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+				      vm_mem_backing_src_alias(src_type)->flag,
+				      region->fd);
 
 	TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
 		    region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1129,12 +1120,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
 
 	/* If shared memory, create an alias. */
 	if (region->fd >= 0) {
-		region->mmap_alias = mmap(NULL, region->mmap_size,
-					  PROT_READ | PROT_WRITE,
-					  vm_mem_backing_src_alias(src_type)->flag,
-					  region->fd, 0);
-		TEST_ASSERT(region->mmap_alias != MAP_FAILED,
-			    __KVM_SYSCALL_ERROR("mmap()",  (int)(unsigned long)MAP_FAILED));
+		region->mmap_alias = kvm_mmap(region->mmap_size,
+					      PROT_READ | PROT_WRITE,
+					      vm_mem_backing_src_alias(src_type)->flag,
+					      region->fd);
 
 		/* Align host alias address */
 		region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1344,10 +1333,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
 	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
 		"smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
 		vcpu_mmap_sz(), sizeof(*vcpu->run));
-	vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
-		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
-	TEST_ASSERT(vcpu->run != MAP_FAILED,
-		    __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+	vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
+			     MAP_SHARED, vcpu->fd);
 
 	if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
 		vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
@@ -1794,9 +1781,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
 			    page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
 		TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
 
-		addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
-			    page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
-		TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+		addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+				  page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
 
 		vcpu->dirty_gfns = addr;
 		vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
@@ -2344,3 +2330,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
 	pg = paddr >> vm->page_shift;
 	return sparsebit_is_set(region->protected_phy_pages, pg);
 }
+
+__weak bool kvm_arch_has_default_irqchip(void)
+{
+	return false;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..8ceeb17c819a 100644
--- a/tools/testing/selftests/kvm/lib/s390/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
 {
 }
+
+bool kvm_arch_has_default_irqchip(void)
+{
+	return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 03eb99af9b8d..8a1848586a85 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -18,6 +18,13 @@
 
 #include "test_util.h"
 
+sigjmp_buf expect_sigbus_jmpbuf;
+
+void __attribute__((used)) expect_sigbus_handler(int signum)
+{
+	siglongjmp(expect_sigbus_jmpbuf, 1);
+}
+
 /*
  * Random number generator that is usable from guest code. This is the
  * Park-Miller LCG using standard constants.
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index c748cd9b2eef..b418502c5ecc 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1318,3 +1318,8 @@ bool sys_clocksource_is_based_on_tsc(void)
 
 	return ret;
 }
+
+bool kvm_arch_has_default_irqchip(void)
+{
+	return true;
+}
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 6a437d2be9fa..37b7e6524533 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -339,8 +339,7 @@ int main(int argc, char *argv[])
 	TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
 
 	fd = kvm_memfd_alloc(slot_size, hugepages);
-	mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+	mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
 
 	TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
 
@@ -413,7 +412,7 @@ int main(int argc, char *argv[])
 	for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
 		vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
 
-	munmap(mem, slot_size / 2);
+	kvm_munmap(mem, slot_size / 2);
 
 	/* Sanity check that the vCPUs actually ran. */
 	for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index 0350a8896a2f..f04768c1d2e4 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -10,6 +10,7 @@
 #include <test_util.h>
 #include <kvm_util.h>
 #include <processor.h>
+#include <pthread.h>
 
 /* Arbitrarily chosen values */
 #define TEST_SIZE		(SZ_2M + PAGE_SIZE)
@@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa)
 	GUEST_DONE();
 }
 
-static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
-			     u64 left)
+struct slot_worker_data {
+	struct kvm_vm *vm;
+	u64 gpa;
+	uint32_t flags;
+	bool worker_ready;
+	bool prefault_ready;
+	bool recreate_slot;
+};
+
+static void *delete_slot_worker(void *__data)
+{
+	struct slot_worker_data *data = __data;
+	struct kvm_vm *vm = data->vm;
+
+	WRITE_ONCE(data->worker_ready, true);
+
+	while (!READ_ONCE(data->prefault_ready))
+		cpu_relax();
+
+	vm_mem_region_delete(vm, TEST_SLOT);
+
+	while (!READ_ONCE(data->recreate_slot))
+		cpu_relax();
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
+				    TEST_SLOT, TEST_NPAGES, data->flags);
+
+	return NULL;
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
+			     u64 size, u64 expected_left, bool private)
 {
 	struct kvm_pre_fault_memory range = {
-		.gpa = gpa,
+		.gpa = base_gpa + offset,
 		.size = size,
 		.flags = 0,
 	};
-	u64 prev;
+	struct slot_worker_data data = {
+		.vm = vcpu->vm,
+		.gpa = base_gpa,
+		.flags = private ? KVM_MEM_GUEST_MEMFD : 0,
+	};
+	bool slot_recreated = false;
+	pthread_t slot_worker;
 	int ret, save_errno;
+	u64 prev;
+
+	/*
+	 * Concurrently delete (and recreate) the slot to test KVM's handling
+	 * of a racing memslot deletion with prefaulting.
+	 */
+	pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
 
-	do {
+	while (!READ_ONCE(data.worker_ready))
+		cpu_relax();
+
+	WRITE_ONCE(data.prefault_ready, true);
+
+	for (;;) {
 		prev = range.size;
 		ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
 		save_errno = errno;
@@ -49,18 +98,65 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
 			    "%sexpecting range.size to change on %s",
 			    ret < 0 ? "not " : "",
 			    ret < 0 ? "failure" : "success");
-	} while (ret >= 0 ? range.size : save_errno == EINTR);
 
-	TEST_ASSERT(range.size == left,
-		    "Completed with %lld bytes left, expected %" PRId64,
-		    range.size, left);
+		/*
+		 * Immediately retry prefaulting if KVM was interrupted by an
+		 * unrelated signal/event.
+		 */
+		if (ret < 0 && save_errno == EINTR)
+			continue;
+
+		/*
+		 * Tell the worker to recreate the slot in order to complete
+		 * prefaulting (if prefault didn't already succeed before the
+		 * slot was deleted) and/or to prepare for the next testcase.
+		 * Wait for the worker to exit so that the next invocation of
+		 * prefaulting is guaranteed to complete (assuming no KVM bugs).
+		 */
+		if (!slot_recreated) {
+			WRITE_ONCE(data.recreate_slot, true);
+			pthread_join(slot_worker, NULL);
+			slot_recreated = true;
+
+			/*
+			 * Retry prefaulting to get a stable result, i.e. to
+			 * avoid seeing random EAGAIN failures.  Don't retry if
+			 * prefaulting already succeeded, as KVM disallows
+			 * prefaulting with size=0, i.e. blindly retrying would
+			 * result in test failures due to EINVAL.  KVM should
+			 * always return success if all bytes are prefaulted,
+			 * i.e. there is no need to guard against EAGAIN being
+			 * returned.
+			 */
+			if (range.size)
+				continue;
+		}
+
+		/*
+		 * All done if there are no remaining bytes to prefault, or if
+		 * prefaulting failed (EINTR was handled above, and EAGAIN due
+		 * to prefaulting a memslot that's being actively deleted should
+		 * be impossible since the memslot has already been recreated).
+		 */
+		if (!range.size || ret < 0)
+			break;
+	}
 
-	if (left == 0)
-		__TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+	TEST_ASSERT(range.size == expected_left,
+		    "Completed with %llu bytes left, expected %lu",
+		    range.size, expected_left);
+
+	/*
+	 * Assert success if prefaulting the entire range should succeed, i.e.
+	 * complete with no bytes remaining.  Otherwise prefaulting should have
+	 * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
+	 * no memslot exists).
+	 */
+	if (!expected_left)
+		TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
 	else
-		/* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
-		__TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
-					    "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+		TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+					  KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
 }
 
 static void __test_pre_fault_memory(unsigned long vm_type, bool private)
@@ -97,9 +193,10 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
 
 	if (private)
 		vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
-	pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
-	pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
-	pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
+
+	pre_fault_memory(vcpu, guest_test_phys_mem, 0, SZ_2M, 0, private);
+	pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+	pre_fault_memory(vcpu, guest_test_phys_mem, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
 
 	vcpu_args_set(vcpu, 1, guest_test_virt_mem);
 	vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index d265b34c54be..50bc1c38225a 100644
--- a/tools/testing/selftests/kvm/s390/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm)
 	self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
 	ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
 		  TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
-	self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
-		    PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
-	ASSERT_NE(self->run, MAP_FAILED);
+	self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE,
+			     MAP_SHARED, self->vcpu_fd);
 	/**
 	 * For virtual cpus that have been created with S390 user controlled
 	 * virtual machines, the resulting vcpu fd can be memory mapped at page
 	 * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
 	 * the virtual cpu's hardware control block.
 	 */
-	self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
-			  PROT_READ | PROT_WRITE, MAP_SHARED,
-			  self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
-	ASSERT_NE(self->sie_block, MAP_FAILED);
+	self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE,
+				     MAP_SHARED, self->vcpu_fd,
+				     KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
 
 	TH_LOG("VM created %p %p", self->run, self->sie_block);
 
@@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm)
 
 FIXTURE_TEARDOWN(uc_kvm)
 {
-	munmap(self->sie_block, PAGE_SIZE);
-	munmap(self->run, self->kvm_run_size);
+	kvm_munmap(self->sie_block, PAGE_SIZE);
+	kvm_munmap(self->run, self->kvm_run_size);
 	close(self->vcpu_fd);
 	close(self->vm_fd);
 	close(self->kvm_fd);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index ce3ac0fd6dfb..7fe427ff9b38 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -433,10 +433,10 @@ static void test_add_max_memory_regions(void)
 	pr_info("Adding slots 0..%i, each memory region with %dK size\n",
 		(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
 
-	mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
-		   PROT_READ | PROT_WRITE,
-		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
-	TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+	mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+		       PROT_READ | PROT_WRITE,
+		       MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1);
 	mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
 
 	for (slot = 0; slot < max_mem_slots; slot++)
@@ -446,9 +446,8 @@ static void test_add_max_memory_regions(void)
 					  mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
 
 	/* Check it cannot be added memory slots beyond the limit */
-	mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
-			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
+	mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+			     MAP_PRIVATE | MAP_ANONYMOUS, -1);
 
 	ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
 					  (uint64_t)max_mem_slots * MEM_REGION_SIZE,
@@ -456,8 +455,8 @@ static void test_add_max_memory_regions(void)
 	TEST_ASSERT(ret == -1 && errno == EINVAL,
 		    "Adding one more memory slot should fail with EINVAL");
 
-	munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
-	munmap(mem_extra, MEM_REGION_SIZE);
+	kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+	kvm_munmap(mem_extra, MEM_REGION_SIZE);
 	kvm_vm_free(vm);
 }
 
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 46991a029f7c..8ec0cb64ad94 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -10,7 +10,11 @@ SYSFS_KERNEL_DIR="/sys/kernel"
 SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch"
 SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug"
 SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes"
-SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing"
+if [[ -e /sys/kernel/tracing/trace ]]; then
+	SYSFS_TRACING_DIR="$SYSFS_KERNEL_DIR/tracing"
+else
+	SYSFS_TRACING_DIR="$SYSFS_DEBUG_DIR/tracing"
+fi
 
 # Kselftest framework requirement - SKIP code is 4
 ksft_skip=4
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 9e3be2ee7f1b..f917b4c4c943 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1758,10 +1758,15 @@ int main(int argc, char *argv[])
 			uffd_test_ops = mem_type->mem_ops;
 			uffd_test_case_ops = test->test_case_ops;
 
-			if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
+			if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
 				gopts.page_size = default_huge_page_size();
-			else
+				if (gopts.page_size == 0) {
+					uffd_test_skip("huge page size is 0, feature missing?");
+					continue;
+				}
+			} else {
 				gopts.page_size = psize();
+			}
 
 			/* Ensure we have at least 2 pages */
 			gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
@@ -1776,12 +1781,6 @@ int main(int argc, char *argv[])
 				continue;
 
 			uffd_test_start("%s on %s", test->name, mem_type->name);
-			if ((mem_type->mem_flag == MEM_HUGETLB ||
-			    mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
-			    (default_huge_page_size() == 0)) {
-				uffd_test_skip("huge page size is 0, feature missing?");
-				continue;
-			}
 			if (!uffd_feature_supported(test)) {
 				uffd_test_skip("feature missing");
 				continue;
diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/selftests/namespaces/.gitignore
index ccfb40837a73..0989e80da457 100644
--- a/tools/testing/selftests/namespaces/.gitignore
+++ b/tools/testing/selftests/namespaces/.gitignore
@@ -1,3 +1,12 @@
 nsid_test
 file_handle_test
 init_ino_test
+ns_active_ref_test
+listns_test
+listns_permissions_test
+listns_efault_test
+siocgskns_test
+cred_change_test
+stress_test
+listns_pagination_bug
+regression_pidfd_setns_test
diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/selftests/namespaces/Makefile
index 5fe4b3dc07d3..fbb821652c17 100644
--- a/tools/testing/selftests/namespaces/Makefile
+++ b/tools/testing/selftests/namespaces/Makefile
@@ -1,7 +1,29 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
 
-TEST_GEN_PROGS := nsid_test file_handle_test init_ino_test
+TEST_GEN_PROGS := nsid_test \
+		  file_handle_test \
+		  init_ino_test \
+		  ns_active_ref_test \
+		  listns_test \
+		  listns_permissions_test \
+		  listns_efault_test \
+		  siocgskns_test \
+		  cred_change_test \
+		  stress_test \
+		  listns_pagination_bug \
+		  regression_pidfd_setns_test
 
 include ../lib.mk
 
+$(OUTPUT)/ns_active_ref_test: ../filesystems/utils.c
+$(OUTPUT)/listns_test: ../filesystems/utils.c
+$(OUTPUT)/listns_permissions_test: ../filesystems/utils.c
+$(OUTPUT)/listns_efault_test: ../filesystems/utils.c
+$(OUTPUT)/siocgskns_test: ../filesystems/utils.c
+$(OUTPUT)/cred_change_test: ../filesystems/utils.c
+$(OUTPUT)/stress_test: ../filesystems/utils.c
+$(OUTPUT)/listns_pagination_bug: ../filesystems/utils.c
+$(OUTPUT)/regression_pidfd_setns_test: ../filesystems/utils.c
+
diff --git a/tools/testing/selftests/namespaces/cred_change_test.c b/tools/testing/selftests/namespaces/cred_change_test.c
new file mode 100644
index 000000000000..7b4f5ad3f725
--- /dev/null
+++ b/tools/testing/selftests/namespaces/cred_change_test.c
@@ -0,0 +1,814 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/nsfs.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test credential changes and their impact on namespace active references.
+ */
+
+/*
+ * Test setuid() in a user namespace properly swaps active references.
+ * Create a user namespace with multiple UIDs mapped, then setuid() between them.
+ * Verify that the user namespace remains active throughout.
+ */
+TEST(setuid_preserves_active_refs)
+{
+	pid_t pid;
+	int status;
+	__u64 userns_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	ssize_t ret;
+	int i;
+	bool found = false;
+	int pipefd[2];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		int fd, userns_fd;
+		__u64 child_userns_id;
+		uid_t orig_uid = getuid();
+		int setuid_count;
+
+		close(pipefd[0]);
+
+		/* Create new user namespace with multiple UIDs mapped (0-9) */
+		userns_fd = get_userns_fd(0, orig_uid, 10);
+		if (userns_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+			close(userns_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(userns_fd);
+
+		/* Get user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Send namespace ID to parent */
+		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+		/*
+		 * Perform multiple setuid() calls.
+		 * Each setuid() triggers commit_creds() which should properly
+		 * swap active references via switch_cred_namespaces().
+		 */
+		for (setuid_count = 0; setuid_count < 50; setuid_count++) {
+			uid_t target_uid = (setuid_count % 10);
+			if (setuid(target_uid) < 0) {
+				if (errno != EPERM) {
+					close(pipefd[1]);
+					exit(1);
+				}
+			}
+		}
+
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+		close(pipefd[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get namespace ID from child");
+	}
+	close(pipefd[0]);
+
+	TH_LOG("Child user namespace ID: %llu", (unsigned long long)userns_id);
+
+	/* Verify namespace is active while child is running */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret, 0);
+	}
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == userns_id) {
+			found = true;
+			break;
+		}
+	}
+	ASSERT_TRUE(found);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Verify namespace becomes inactive after child exits */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	ASSERT_GE(ret, 0);
+
+	found = false;
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == userns_id) {
+			found = true;
+			break;
+		}
+	}
+
+	ASSERT_FALSE(found);
+	TH_LOG("setuid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test setgid() in a user namespace properly handles active references.
+ */
+TEST(setgid_preserves_active_refs)
+{
+	pid_t pid;
+	int status;
+	__u64 userns_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	ssize_t ret;
+	int i;
+	bool found = false;
+	int pipefd[2];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		int fd, userns_fd;
+		__u64 child_userns_id;
+		uid_t orig_uid = getuid();
+		int setgid_count;
+
+		close(pipefd[0]);
+
+		/* Create new user namespace with multiple GIDs mapped */
+		userns_fd = get_userns_fd(0, orig_uid, 10);
+		if (userns_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+			close(userns_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(userns_fd);
+
+		/* Get user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+		/* Perform multiple setgid() calls */
+		for (setgid_count = 0; setgid_count < 50; setgid_count++) {
+			gid_t target_gid = (setgid_count % 10);
+			if (setgid(target_gid) < 0) {
+				if (errno != EPERM) {
+					close(pipefd[1]);
+					exit(1);
+				}
+			}
+		}
+
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+		close(pipefd[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get namespace ID from child");
+	}
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Verify namespace becomes inactive */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret, 0);
+	}
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == userns_id) {
+			found = true;
+			break;
+		}
+	}
+
+	ASSERT_FALSE(found);
+	TH_LOG("setgid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test setresuid() which changes real, effective, and saved UIDs.
+ * This should properly swap active references via commit_creds().
+ */
+TEST(setresuid_preserves_active_refs)
+{
+	pid_t pid;
+	int status;
+	__u64 userns_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	ssize_t ret;
+	int i;
+	bool found = false;
+	int pipefd[2];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		int fd, userns_fd;
+		__u64 child_userns_id;
+		uid_t orig_uid = getuid();
+		int setres_count;
+
+		close(pipefd[0]);
+
+		/* Create new user namespace */
+		userns_fd = get_userns_fd(0, orig_uid, 10);
+		if (userns_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+			close(userns_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(userns_fd);
+
+		/* Get user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+		/* Perform multiple setresuid() calls */
+		for (setres_count = 0; setres_count < 30; setres_count++) {
+			uid_t uid1 = (setres_count % 5);
+			uid_t uid2 = ((setres_count + 1) % 5);
+			uid_t uid3 = ((setres_count + 2) % 5);
+
+			if (setresuid(uid1, uid2, uid3) < 0) {
+				if (errno != EPERM) {
+					close(pipefd[1]);
+					exit(1);
+				}
+			}
+		}
+
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+		close(pipefd[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get namespace ID from child");
+	}
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Verify namespace becomes inactive */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret, 0);
+	}
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == userns_id) {
+			found = true;
+			break;
+		}
+	}
+
+	ASSERT_FALSE(found);
+	TH_LOG("setresuid() correctly preserved active references (no leak)");
+}
+
+/*
+ * Test credential changes across multiple user namespaces.
+ * Create nested user namespaces and verify active reference tracking.
+ */
+TEST(cred_change_nested_userns)
+{
+	pid_t pid;
+	int status;
+	__u64 parent_userns_id, child_userns_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	ssize_t ret;
+	int i;
+	bool found_parent = false, found_child = false;
+	int pipefd[2];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		int fd, userns_fd;
+		__u64 parent_id, child_id;
+		uid_t orig_uid = getuid();
+
+		close(pipefd[0]);
+
+		/* Create first user namespace */
+		userns_fd = get_userns_fd(0, orig_uid, 1);
+		if (userns_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+			close(userns_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(userns_fd);
+
+		/* Get first namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &parent_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Create nested user namespace */
+		userns_fd = get_userns_fd(0, 0, 1);
+		if (userns_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+			close(userns_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(userns_fd);
+
+		/* Get nested namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Send both IDs to parent */
+		write(pipefd[1], &parent_id, sizeof(parent_id));
+		write(pipefd[1], &child_id, sizeof(child_id));
+
+		/* Perform some credential changes in nested namespace */
+		setuid(0);
+		setgid(0);
+
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	/* Read both namespace IDs */
+	if (read(pipefd[0], &parent_userns_id, sizeof(parent_userns_id)) != sizeof(parent_userns_id)) {
+		close(pipefd[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get parent namespace ID");
+	}
+
+	if (read(pipefd[0], &child_userns_id, sizeof(child_userns_id)) != sizeof(child_userns_id)) {
+		close(pipefd[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get child namespace ID");
+	}
+	close(pipefd[0]);
+
+	TH_LOG("Parent userns: %llu, Child userns: %llu",
+	       (unsigned long long)parent_userns_id,
+	       (unsigned long long)child_userns_id);
+
+	/* Verify both namespaces are active */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret, 0);
+	}
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == parent_userns_id)
+			found_parent = true;
+		if (ns_ids[i] == child_userns_id)
+			found_child = true;
+	}
+
+	ASSERT_TRUE(found_parent);
+	ASSERT_TRUE(found_child);
+
+	/* Wait for child */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Verify both namespaces become inactive */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	ASSERT_GE(ret, 0);
+
+	found_parent = false;
+	found_child = false;
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == parent_userns_id)
+			found_parent = true;
+		if (ns_ids[i] == child_userns_id)
+			found_child = true;
+	}
+
+	ASSERT_FALSE(found_parent);
+	ASSERT_FALSE(found_child);
+	TH_LOG("Nested user namespace credential changes preserved active refs (no leak)");
+}
+
+/*
+ * Test rapid credential changes don't cause refcount imbalances.
+ * This stress-tests the switch_cred_namespaces() logic.
+ */
+TEST(rapid_cred_changes_no_leak)
+{
+	pid_t pid;
+	int status;
+	__u64 userns_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	ssize_t ret;
+	int i;
+	bool found = false;
+	int pipefd[2];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		int fd, userns_fd;
+		__u64 child_userns_id;
+		uid_t orig_uid = getuid();
+		int change_count;
+
+		close(pipefd[0]);
+
+		/* Create new user namespace with wider range of UIDs/GIDs */
+		userns_fd = get_userns_fd(0, orig_uid, 100);
+		if (userns_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+			close(userns_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(userns_fd);
+
+		/* Get user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+		/*
+		 * Perform many rapid credential changes.
+		 * Mix setuid, setgid, setreuid, setregid, setresuid, setresgid.
+		 */
+		for (change_count = 0; change_count < 200; change_count++) {
+			switch (change_count % 6) {
+			case 0:
+				setuid(change_count % 50);
+				break;
+			case 1:
+				setgid(change_count % 50);
+				break;
+			case 2:
+				setreuid(change_count % 50, (change_count + 1) % 50);
+				break;
+			case 3:
+				setregid(change_count % 50, (change_count + 1) % 50);
+				break;
+			case 4:
+				setresuid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
+				break;
+			case 5:
+				setresgid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
+				break;
+			}
+		}
+
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+		close(pipefd[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get namespace ID from child");
+	}
+	close(pipefd[0]);
+
+	TH_LOG("Testing with user namespace ID: %llu", (unsigned long long)userns_id);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Verify namespace becomes inactive (no leaked active refs) */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret, 0);
+	}
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == userns_id) {
+			found = true;
+			break;
+		}
+	}
+
+	ASSERT_FALSE(found);
+	TH_LOG("200 rapid credential changes completed with no active ref leak");
+}
+
+/*
+ * Test setfsuid/setfsgid which change filesystem UID/GID.
+ * These also trigger credential changes but may have different code paths.
+ */
+TEST(setfsuid_preserves_active_refs)
+{
+	pid_t pid;
+	int status;
+	__u64 userns_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	ssize_t ret;
+	int i;
+	bool found = false;
+	int pipefd[2];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		int fd, userns_fd;
+		__u64 child_userns_id;
+		uid_t orig_uid = getuid();
+		int change_count;
+
+		close(pipefd[0]);
+
+		/* Create new user namespace */
+		userns_fd = get_userns_fd(0, orig_uid, 10);
+		if (userns_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+			close(userns_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(userns_fd);
+
+		/* Get user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
+
+		/* Perform multiple setfsuid/setfsgid calls */
+		for (change_count = 0; change_count < 50; change_count++) {
+			setfsuid(change_count % 10);
+			setfsgid(change_count % 10);
+		}
+
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
+		close(pipefd[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get namespace ID from child");
+	}
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Verify namespace becomes inactive */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret, 0);
+	}
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == userns_id) {
+			found = true;
+			break;
+		}
+	}
+
+	ASSERT_FALSE(found);
+	TH_LOG("setfsuid/setfsgid correctly preserved active references (no leak)");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_efault_test.c b/tools/testing/selftests/namespaces/listns_efault_test.c
new file mode 100644
index 000000000000..c7ed4023d7a8
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_efault_test.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "../pidfd/pidfd.h"
+#include "wrappers.h"
+
+/*
+ * Test listns() error handling with invalid buffer addresses.
+ *
+ * When the buffer pointer is invalid (e.g., crossing page boundaries
+ * into unmapped memory), listns() returns EINVAL.
+ *
+ * This test also creates mount namespaces that get destroyed during
+ * iteration, testing that namespace cleanup happens outside the RCU
+ * read lock.
+ */
+TEST(listns_partial_fault_with_ns_cleanup)
+{
+	void *map;
+	__u64 *ns_ids;
+	ssize_t ret;
+	long page_size;
+	pid_t pid, iter_pid;
+	int pidfds[5];
+	int sv[5][2];
+	int iter_pidfd;
+	int i, status;
+	char c;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	ASSERT_GT(page_size, 0);
+
+	/*
+	 * Map two pages:
+	 * - First page: readable and writable
+	 * - Second page: will be unmapped to trigger EFAULT
+	 */
+	map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(map, MAP_FAILED);
+
+	/* Unmap the second page */
+	ret = munmap((char *)map + page_size, page_size);
+	ASSERT_EQ(ret, 0);
+
+	/*
+	 * Position the buffer pointer so there's room for exactly one u64
+	 * before the page boundary. The second u64 would fall into the
+	 * unmapped page.
+	 */
+	ns_ids = ((__u64 *)((char *)map + page_size)) - 1;
+
+	/*
+	 * Create a separate process to run listns() in a loop concurrently
+	 * with namespace creation and destruction.
+	 */
+	iter_pid = create_child(&iter_pidfd, 0);
+	ASSERT_NE(iter_pid, -1);
+
+	if (iter_pid == 0) {
+		struct ns_id_req req = {
+			.size = sizeof(req),
+			.spare = 0,
+			.ns_id = 0,
+			.ns_type = 0,  /* All types */
+			.spare2 = 0,
+			.user_ns_id = 0,  /* Global listing */
+		};
+		int iter_ret;
+
+		/*
+		 * Loop calling listns() until killed.
+		 * The kernel should:
+		 * 1. Successfully write the first namespace ID (within valid page)
+		 * 2. Fail with EFAULT when trying to write the second ID (unmapped page)
+		 * 3. Handle concurrent namespace destruction without deadlock
+		 */
+		while (1) {
+			iter_ret = sys_listns(&req, ns_ids, 2, 0);
+
+			if (iter_ret == -1 && errno == ENOSYS)
+				_exit(PIDFD_SKIP);
+		}
+	}
+
+	/* Small delay to let iterator start looping */
+	usleep(50000);
+
+	/*
+	 * Create several child processes, each in its own mount namespace.
+	 * These will be destroyed while the iterator is running listns().
+	 */
+	for (i = 0; i < 5; i++) {
+		/* Create socketpair for synchronization */
+		ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+		pid = create_child(&pidfds[i], CLONE_NEWNS);
+		ASSERT_NE(pid, -1);
+
+		if (pid == 0) {
+			close(sv[i][0]); /* Close parent end */
+
+			if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+				_exit(1);
+
+			/* Child: create a couple of tmpfs mounts */
+			if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+				_exit(1);
+			if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+				_exit(1);
+
+			if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+				_exit(1);
+			if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+				_exit(1);
+
+			/* Signal parent that setup is complete */
+			if (write_nointr(sv[i][1], "R", 1) != 1)
+				_exit(1);
+
+			/* Wait for parent to signal us to exit */
+			if (read_nointr(sv[i][1], &c, 1) != 1)
+				_exit(1);
+
+			close(sv[i][1]);
+			_exit(0);
+		}
+
+		close(sv[i][1]); /* Close child end */
+	}
+
+	/* Wait for all children to finish setup */
+	for (i = 0; i < 5; i++) {
+		ret = read_nointr(sv[i][0], &c, 1);
+		ASSERT_EQ(ret, 1);
+		ASSERT_EQ(c, 'R');
+	}
+
+	/*
+	 * Signal children to exit. This will destroy their mount namespaces
+	 * while listns() is iterating the namespace tree.
+	 * This tests that cleanup happens outside the RCU read lock.
+	 */
+	for (i = 0; i < 5; i++)
+		write_nointr(sv[i][0], "X", 1);
+
+	/* Wait for all mount namespace children to exit and cleanup */
+	for (i = 0; i < 5; i++) {
+		waitpid(-1, NULL, 0);
+		close(sv[i][0]);
+		close(pidfds[i]);
+	}
+
+	/* Kill iterator and wait for it */
+	sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+	ret = waitpid(iter_pid, &status, 0);
+	ASSERT_EQ(ret, iter_pid);
+	close(iter_pidfd);
+
+	/* Should have been killed */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+	/* Clean up */
+	munmap(map, page_size);
+}
+
+/*
+ * Test listns() error handling when the entire buffer is invalid.
+ * This is a sanity check that basic invalid pointer detection works.
+ */
+TEST(listns_complete_fault)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 *ns_ids;
+	ssize_t ret;
+
+	/* Use a clearly invalid pointer */
+	ns_ids = (__u64 *)0xdeadbeef;
+
+	ret = sys_listns(&req, ns_ids, 10, 0);
+
+	if (ret == -1 && errno == ENOSYS)
+		SKIP(return, "listns() not supported");
+
+	/* Should fail with EFAULT */
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EFAULT);
+}
+
+/*
+ * Test listns() error handling when the buffer is NULL.
+ */
+TEST(listns_null_buffer)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	ssize_t ret;
+
+	/* NULL buffer with non-zero count should fail */
+	ret = sys_listns(&req, NULL, 10, 0);
+
+	if (ret == -1 && errno == ENOSYS)
+		SKIP(return, "listns() not supported");
+
+	/* Should fail with EFAULT */
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EFAULT);
+}
+
+/*
+ * Test listns() with a buffer that becomes invalid mid-iteration
+ * (after several successful writes), combined with mount namespace
+ * destruction to test RCU cleanup logic.
+ */
+TEST(listns_late_fault_with_ns_cleanup)
+{
+	void *map;
+	__u64 *ns_ids;
+	ssize_t ret;
+	long page_size;
+	pid_t pid, iter_pid;
+	int pidfds[10];
+	int sv[10][2];
+	int iter_pidfd;
+	int i, status;
+	char c;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	ASSERT_GT(page_size, 0);
+
+	/* Map two pages */
+	map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(map, MAP_FAILED);
+
+	/* Unmap the second page */
+	ret = munmap((char *)map + page_size, page_size);
+	ASSERT_EQ(ret, 0);
+
+	/*
+	 * Position buffer so we can write several u64s successfully
+	 * before hitting the page boundary.
+	 */
+	ns_ids = ((__u64 *)((char *)map + page_size)) - 5;
+
+	/*
+	 * Create a separate process to run listns() concurrently.
+	 */
+	iter_pid = create_child(&iter_pidfd, 0);
+	ASSERT_NE(iter_pid, -1);
+
+	if (iter_pid == 0) {
+		struct ns_id_req req = {
+			.size = sizeof(req),
+			.spare = 0,
+			.ns_id = 0,
+			.ns_type = 0,
+			.spare2 = 0,
+			.user_ns_id = 0,
+		};
+		int iter_ret;
+
+		/*
+		 * Loop calling listns() until killed.
+		 * Request 10 namespace IDs while namespaces are being destroyed.
+		 * This tests:
+		 * 1. EFAULT handling when buffer becomes invalid
+		 * 2. Namespace cleanup outside RCU read lock during iteration
+		 */
+		while (1) {
+			iter_ret = sys_listns(&req, ns_ids, 10, 0);
+
+			if (iter_ret == -1 && errno == ENOSYS)
+				_exit(PIDFD_SKIP);
+		}
+	}
+
+	/* Small delay to let iterator start looping */
+	usleep(50000);
+
+	/*
+	 * Create more children with mount namespaces to increase the
+	 * likelihood that namespace cleanup happens during iteration.
+	 */
+	for (i = 0; i < 10; i++) {
+		/* Create socketpair for synchronization */
+		ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+		pid = create_child(&pidfds[i], CLONE_NEWNS);
+		ASSERT_NE(pid, -1);
+
+		if (pid == 0) {
+			close(sv[i][0]); /* Close parent end */
+
+			if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+				_exit(1);
+
+			/* Child: create tmpfs mounts */
+			if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+				_exit(1);
+			if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+				_exit(1);
+
+			if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+				_exit(1);
+			if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+				_exit(1);
+
+			/* Signal parent that setup is complete */
+			if (write_nointr(sv[i][1], "R", 1) != 1)
+				_exit(1);
+
+			/* Wait for parent to signal us to exit */
+			if (read_nointr(sv[i][1], &c, 1) != 1)
+				_exit(1);
+
+			close(sv[i][1]);
+			_exit(0);
+		}
+
+		close(sv[i][1]); /* Close child end */
+	}
+
+	/* Wait for all children to finish setup */
+	for (i = 0; i < 10; i++) {
+		ret = read_nointr(sv[i][0], &c, 1);
+		ASSERT_EQ(ret, 1);
+		ASSERT_EQ(c, 'R');
+	}
+
+	/* Kill half the children */
+	for (i = 0; i < 5; i++)
+		write_nointr(sv[i][0], "X", 1);
+
+	/* Small delay to let some exit */
+	usleep(10000);
+
+	/* Kill remaining children */
+	for (i = 5; i < 10; i++)
+		write_nointr(sv[i][0], "X", 1);
+
+	/* Wait for all children and cleanup */
+	for (i = 0; i < 10; i++) {
+		waitpid(-1, NULL, 0);
+		close(sv[i][0]);
+		close(pidfds[i]);
+	}
+
+	/* Kill iterator and wait for it */
+	sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+	ret = waitpid(iter_pid, &status, 0);
+	ASSERT_EQ(ret, iter_pid);
+	close(iter_pidfd);
+
+	/* Should have been killed */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+	/* Clean up */
+	munmap(map, page_size);
+}
+
+/*
+ * Test specifically focused on mount namespace cleanup during EFAULT.
+ * Filter for mount namespaces only.
+ */
+TEST(listns_mnt_ns_cleanup_on_fault)
+{
+	void *map;
+	__u64 *ns_ids;
+	ssize_t ret;
+	long page_size;
+	pid_t pid, iter_pid;
+	int pidfds[8];
+	int sv[8][2];
+	int iter_pidfd;
+	int i, status;
+	char c;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	ASSERT_GT(page_size, 0);
+
+	/* Set up partial fault buffer */
+	map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(map, MAP_FAILED);
+
+	ret = munmap((char *)map + page_size, page_size);
+	ASSERT_EQ(ret, 0);
+
+	/* Position for 3 successful writes, then fault */
+	ns_ids = ((__u64 *)((char *)map + page_size)) - 3;
+
+	/*
+	 * Create a separate process to run listns() concurrently.
+	 */
+	iter_pid = create_child(&iter_pidfd, 0);
+	ASSERT_NE(iter_pid, -1);
+
+	if (iter_pid == 0) {
+		struct ns_id_req req = {
+			.size = sizeof(req),
+			.spare = 0,
+			.ns_id = 0,
+			.ns_type = CLONE_NEWNS,  /* Only mount namespaces */
+			.spare2 = 0,
+			.user_ns_id = 0,
+		};
+		int iter_ret;
+
+		/*
+		 * Loop calling listns() until killed.
+		 * Call listns() to race with namespace destruction.
+		 */
+		while (1) {
+			iter_ret = sys_listns(&req, ns_ids, 10, 0);
+
+			if (iter_ret == -1 && errno == ENOSYS)
+				_exit(PIDFD_SKIP);
+		}
+	}
+
+	/* Small delay to let iterator start looping */
+	usleep(50000);
+
+	/* Create children with mount namespaces */
+	for (i = 0; i < 8; i++) {
+		/* Create socketpair for synchronization */
+		ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
+
+		pid = create_child(&pidfds[i], CLONE_NEWNS);
+		ASSERT_NE(pid, -1);
+
+		if (pid == 0) {
+			close(sv[i][0]); /* Close parent end */
+
+			if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+				_exit(1);
+
+			/* Do some mount operations to make cleanup more interesting */
+			if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
+				_exit(1);
+			if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
+				_exit(1);
+
+			if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
+				_exit(1);
+			if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
+				_exit(1);
+
+			/* Signal parent that setup is complete */
+			if (write_nointr(sv[i][1], "R", 1) != 1)
+				_exit(1);
+
+			/* Wait for parent to signal us to exit */
+			if (read_nointr(sv[i][1], &c, 1) != 1)
+				_exit(1);
+
+			close(sv[i][1]);
+			_exit(0);
+		}
+
+		close(sv[i][1]); /* Close child end */
+	}
+
+	/* Wait for all children to finish setup */
+	for (i = 0; i < 8; i++) {
+		ret = read_nointr(sv[i][0], &c, 1);
+		ASSERT_EQ(ret, 1);
+		ASSERT_EQ(c, 'R');
+	}
+
+	/* Kill children to trigger namespace destruction during iteration */
+	for (i = 0; i < 8; i++)
+		write_nointr(sv[i][0], "X", 1);
+
+	/* Wait for children and cleanup */
+	for (i = 0; i < 8; i++) {
+		waitpid(-1, NULL, 0);
+		close(sv[i][0]);
+		close(pidfds[i]);
+	}
+
+	/* Kill iterator and wait for it */
+	sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
+	ret = waitpid(iter_pid, &status, 0);
+	ASSERT_EQ(ret, iter_pid);
+	close(iter_pidfd);
+
+	/* Should have been killed */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(WTERMSIG(status), SIGKILL);
+
+	munmap(map, page_size);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_pagination_bug.c b/tools/testing/selftests/namespaces/listns_pagination_bug.c
new file mode 100644
index 000000000000..da7d33f96397
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_pagination_bug.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Minimal test case to reproduce KASAN out-of-bounds in listns pagination.
+ *
+ * The bug occurs when:
+ * 1. Filtering by a specific namespace type (e.g., CLONE_NEWUSER)
+ * 2. Using pagination (req.ns_id != 0)
+ * 3. The lookup_ns_id_at() call in do_listns() passes ns_type=0 instead of
+ *    the filtered type, causing it to search the unified tree and potentially
+ *    return a namespace of the wrong type.
+ */
+TEST(pagination_with_type_filter)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,  /* Filter by user namespace */
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	pid_t pids[10];
+	int num_children = 10;
+	int i;
+	int sv[2];
+	__u64 first_batch[3];
+	ssize_t ret;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+	/* Create children with user namespaces */
+	for (i = 0; i < num_children; i++) {
+		pids[i] = fork();
+		ASSERT_GE(pids[i], 0);
+
+		if (pids[i] == 0) {
+			char c;
+			close(sv[0]);
+
+			if (setup_userns() < 0) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			/* Signal parent we're ready */
+			if (write(sv[1], &c, 1) != 1) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			/* Wait for parent signal to exit */
+			if (read(sv[1], &c, 1) != 1) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			close(sv[1]);
+			exit(0);
+		}
+	}
+
+	close(sv[1]);
+
+	/* Wait for all children to signal ready */
+	for (i = 0; i < num_children; i++) {
+		char c;
+		if (read(sv[0], &c, 1) != 1) {
+			close(sv[0]);
+			for (int j = 0; j < num_children; j++)
+				kill(pids[j], SIGKILL);
+			for (int j = 0; j < num_children; j++)
+				waitpid(pids[j], NULL, 0);
+			ASSERT_TRUE(false);
+		}
+	}
+
+	/* First batch - this should work */
+	ret = sys_listns(&req, first_batch, 3, 0);
+	if (ret < 0) {
+		if (errno == ENOSYS) {
+			close(sv[0]);
+			for (i = 0; i < num_children; i++)
+				kill(pids[i], SIGKILL);
+			for (i = 0; i < num_children; i++)
+				waitpid(pids[i], NULL, 0);
+			SKIP(return, "listns() not supported");
+		}
+		ASSERT_GE(ret, 0);
+	}
+
+	TH_LOG("First batch returned %zd entries", ret);
+
+	if (ret == 3) {
+		__u64 second_batch[3];
+
+		/* Second batch - pagination triggers the bug */
+		req.ns_id = first_batch[2];  /* Continue from last ID */
+		ret = sys_listns(&req, second_batch, 3, 0);
+
+		TH_LOG("Second batch returned %zd entries", ret);
+		ASSERT_GE(ret, 0);
+	}
+
+	/* Signal all children to exit */
+	for (i = 0; i < num_children; i++) {
+		char c = 'X';
+		if (write(sv[0], &c, 1) != 1) {
+			close(sv[0]);
+			for (int j = i; j < num_children; j++)
+				kill(pids[j], SIGKILL);
+			for (int j = 0; j < num_children; j++)
+				waitpid(pids[j], NULL, 0);
+			ASSERT_TRUE(false);
+		}
+	}
+
+	close(sv[0]);
+
+	/* Cleanup */
+	for (i = 0; i < num_children; i++) {
+		int status;
+		waitpid(pids[i], &status, 0);
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_permissions_test.c b/tools/testing/selftests/namespaces/listns_permissions_test.c
new file mode 100644
index 000000000000..82d818751a5f
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_permissions_test.c
@@ -0,0 +1,759 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/capability.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test that unprivileged users can only see namespaces they're currently in.
+ * Create a namespace, drop privileges, verify we can only see our own namespaces.
+ */
+TEST(listns_unprivileged_current_only)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWNET,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[100];
+	ssize_t ret;
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	bool found_ours;
+	int unexpected_count;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		int fd;
+		__u64 our_netns_id;
+		bool found_ours;
+		int unexpected_count;
+
+		close(pipefd[0]);
+
+		/* Create user namespace to be unprivileged */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Create a network namespace */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get our network namespace ID */
+		fd = open("/proc/self/ns/net", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &our_netns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Now we're unprivileged - list all network namespaces */
+		ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+		if (ret < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* We should only see our own network namespace */
+		found_ours = false;
+		unexpected_count = 0;
+
+		for (ssize_t i = 0; i < ret; i++) {
+			if (ns_ids[i] == our_netns_id) {
+				found_ours = true;
+			} else {
+				/* This is either init_net (which we can see) or unexpected */
+				unexpected_count++;
+			}
+		}
+
+		/* Send results to parent */
+		write(pipefd[1], &found_ours, sizeof(found_ours));
+		write(pipefd[1], &unexpected_count, sizeof(unexpected_count));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+
+	found_ours = false;
+	unexpected_count = 0;
+	read(pipefd[0], &found_ours, sizeof(found_ours));
+	read(pipefd[0], &unexpected_count, sizeof(unexpected_count));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Child should have seen its own namespace */
+	ASSERT_TRUE(found_ours);
+
+	TH_LOG("Unprivileged child saw its own namespace, plus %d others (likely init_net)",
+			unexpected_count);
+}
+
+/*
+ * Test that users with CAP_SYS_ADMIN in a user namespace can see
+ * all namespaces owned by that user namespace.
+ */
+TEST(listns_cap_sys_admin_in_userns)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,  /* All types */
+		.spare2 = 0,
+		.user_ns_id = 0,  /* Will be set to our created user namespace */
+	};
+	__u64 ns_ids[100];
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	bool success;
+	ssize_t count;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		int fd;
+		__u64 userns_id;
+		ssize_t ret;
+		int min_expected;
+		bool success;
+
+		close(pipefd[0]);
+
+		/* Create user namespace - we'll have CAP_SYS_ADMIN in it */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get the user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Create several namespaces owned by this user namespace */
+		unshare(CLONE_NEWNET);
+		unshare(CLONE_NEWUTS);
+		unshare(CLONE_NEWIPC);
+
+		/* List namespaces owned by our user namespace */
+		req.user_ns_id = userns_id;
+		ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+		if (ret < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/*
+		 * We have CAP_SYS_ADMIN in this user namespace,
+		 * so we should see all namespaces owned by it.
+		 * That includes: net, uts, ipc, and the user namespace itself.
+		 */
+		min_expected = 4;
+		success = (ret >= min_expected);
+
+		write(pipefd[1], &success, sizeof(success));
+		write(pipefd[1], &ret, sizeof(ret));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+
+	success = false;
+	count = 0;
+	read(pipefd[0], &success, sizeof(success));
+	read(pipefd[0], &count, sizeof(count));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_TRUE(success);
+	TH_LOG("User with CAP_SYS_ADMIN saw %zd namespaces owned by their user namespace",
+			count);
+}
+
+/*
+ * Test that users cannot see namespaces from unrelated user namespaces.
+ * Create two sibling user namespaces, verify they can't see each other's
+ * owned namespaces.
+ */
+TEST(listns_cannot_see_sibling_userns_namespaces)
+{
+	int pipefd[2];
+	pid_t pid1, pid2;
+	int status;
+	__u64 netns_a_id;
+	int pipefd2[2];
+	bool found_sibling_netns;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	/* Fork first child - creates user namespace A */
+	pid1 = fork();
+	ASSERT_GE(pid1, 0);
+
+	if (pid1 == 0) {
+		int fd;
+		__u64 netns_a_id;
+		char buf;
+
+		close(pipefd[0]);
+
+		/* Create user namespace A */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Create network namespace owned by user namespace A */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get network namespace ID */
+		fd = open("/proc/self/ns/net", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &netns_a_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Send namespace ID to parent */
+		write(pipefd[1], &netns_a_id, sizeof(netns_a_id));
+
+		/* Keep alive for sibling to check */
+		read(pipefd[1], &buf, 1);
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent reads namespace A ID */
+	close(pipefd[1]);
+	netns_a_id = 0;
+	read(pipefd[0], &netns_a_id, sizeof(netns_a_id));
+
+	TH_LOG("User namespace A created network namespace with ID %llu",
+	       (unsigned long long)netns_a_id);
+
+	/* Fork second child - creates user namespace B */
+	ASSERT_EQ(pipe(pipefd2), 0);
+
+	pid2 = fork();
+	ASSERT_GE(pid2, 0);
+
+	if (pid2 == 0) {
+		struct ns_id_req req = {
+			.size = sizeof(req),
+			.spare = 0,
+			.ns_id = 0,
+			.ns_type = CLONE_NEWNET,
+			.spare2 = 0,
+			.user_ns_id = 0,
+		};
+		__u64 ns_ids[100];
+		ssize_t ret;
+		bool found_sibling_netns;
+
+		close(pipefd[0]);
+		close(pipefd2[0]);
+
+		/* Create user namespace B (sibling to A) */
+		if (setup_userns() < 0) {
+			close(pipefd2[1]);
+			exit(1);
+		}
+
+		/* Try to list all network namespaces */
+		ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+		found_sibling_netns = false;
+		if (ret > 0) {
+			for (ssize_t i = 0; i < ret; i++) {
+				if (ns_ids[i] == netns_a_id) {
+					found_sibling_netns = true;
+					break;
+				}
+			}
+		}
+
+		/* We should NOT see the sibling's network namespace */
+		write(pipefd2[1], &found_sibling_netns, sizeof(found_sibling_netns));
+		close(pipefd2[1]);
+		exit(0);
+	}
+
+	/* Parent reads result from second child */
+	close(pipefd2[1]);
+	found_sibling_netns = false;
+	read(pipefd2[0], &found_sibling_netns, sizeof(found_sibling_netns));
+	close(pipefd2[0]);
+
+	/* Signal first child to exit */
+	close(pipefd[0]);
+
+	/* Wait for both children */
+	waitpid(pid2, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+
+	waitpid(pid1, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+
+	/* Second child should NOT have seen first child's namespace */
+	ASSERT_FALSE(found_sibling_netns);
+	TH_LOG("User namespace B correctly could not see sibling namespace A's network namespace");
+}
+
+/*
+ * Test permission checking with LISTNS_CURRENT_USER.
+ * Verify that listing with LISTNS_CURRENT_USER respects permissions.
+ */
+TEST(listns_current_user_permissions)
+{
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	bool success;
+	ssize_t count;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		struct ns_id_req req = {
+			.size = sizeof(req),
+			.spare = 0,
+			.ns_id = 0,
+			.ns_type = 0,
+			.spare2 = 0,
+			.user_ns_id = LISTNS_CURRENT_USER,
+		};
+		__u64 ns_ids[100];
+		ssize_t ret;
+		bool success;
+
+		close(pipefd[0]);
+
+		/* Create user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Create some namespaces owned by this user namespace */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (unshare(CLONE_NEWUTS) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* List with LISTNS_CURRENT_USER - should see our owned namespaces */
+		ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+		success = (ret >= 3);  /* At least user, net, uts */
+		write(pipefd[1], &success, sizeof(success));
+		write(pipefd[1], &ret, sizeof(ret));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+
+	success = false;
+	count = 0;
+	read(pipefd[0], &success, sizeof(success));
+	read(pipefd[0], &count, sizeof(count));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_TRUE(success);
+	TH_LOG("LISTNS_CURRENT_USER returned %zd namespaces", count);
+}
+
+/*
+ * Test that CAP_SYS_ADMIN in parent user namespace allows seeing
+ * child user namespace's owned namespaces.
+ */
+TEST(listns_parent_userns_cap_sys_admin)
+{
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	bool found_child_userns;
+	ssize_t count;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		int fd;
+		__u64 parent_userns_id;
+		__u64 child_userns_id;
+		struct ns_id_req req;
+		__u64 ns_ids[100];
+		ssize_t ret;
+		bool found_child_userns;
+
+		close(pipefd[0]);
+
+		/* Create parent user namespace - we have CAP_SYS_ADMIN in it */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get parent user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &parent_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Create child user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get child user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Create namespaces owned by child user namespace */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* List namespaces owned by parent user namespace */
+		req.size = sizeof(req);
+		req.spare = 0;
+		req.ns_id = 0;
+		req.ns_type = 0;
+		req.spare2 = 0;
+		req.user_ns_id = parent_userns_id;
+
+		ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+		/* Should see child user namespace in the list */
+		found_child_userns = false;
+		if (ret > 0) {
+			for (ssize_t i = 0; i < ret; i++) {
+				if (ns_ids[i] == child_userns_id) {
+					found_child_userns = true;
+					break;
+				}
+			}
+		}
+
+		write(pipefd[1], &found_child_userns, sizeof(found_child_userns));
+		write(pipefd[1], &ret, sizeof(ret));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+
+	found_child_userns = false;
+	count = 0;
+	read(pipefd[0], &found_child_userns, sizeof(found_child_userns));
+	read(pipefd[0], &count, sizeof(count));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_TRUE(found_child_userns);
+	TH_LOG("Process with CAP_SYS_ADMIN in parent user namespace saw child user namespace (total: %zd)",
+			count);
+}
+
+/*
+ * Test that we can see user namespaces we have CAP_SYS_ADMIN inside of.
+ * This is different from seeing namespaces owned by a user namespace.
+ */
+TEST(listns_cap_sys_admin_inside_userns)
+{
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	bool found_ours;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		int fd;
+		__u64 our_userns_id;
+		struct ns_id_req req;
+		__u64 ns_ids[100];
+		ssize_t ret;
+		bool found_ours;
+
+		close(pipefd[0]);
+
+		/* Create user namespace - we have CAP_SYS_ADMIN inside it */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get our user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &our_userns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* List all user namespaces globally */
+		req.size = sizeof(req);
+		req.spare = 0;
+		req.ns_id = 0;
+		req.ns_type = CLONE_NEWUSER;
+		req.spare2 = 0;
+		req.user_ns_id = 0;
+
+		ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+		/* We should be able to see our own user namespace */
+		found_ours = false;
+		if (ret > 0) {
+			for (ssize_t i = 0; i < ret; i++) {
+				if (ns_ids[i] == our_userns_id) {
+					found_ours = true;
+					break;
+				}
+			}
+		}
+
+		write(pipefd[1], &found_ours, sizeof(found_ours));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+
+	found_ours = false;
+	read(pipefd[0], &found_ours, sizeof(found_ours));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_TRUE(found_ours);
+	TH_LOG("Process can see user namespace it has CAP_SYS_ADMIN inside of");
+}
+
+/*
+ * Test that dropping CAP_SYS_ADMIN restricts what we can see.
+ */
+TEST(listns_drop_cap_sys_admin)
+{
+	cap_t caps;
+	cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
+
+	/* This test needs to start with CAP_SYS_ADMIN */
+	caps = cap_get_proc();
+	if (!caps) {
+		SKIP(return, "Cannot get capabilities");
+	}
+
+	cap_flag_value_t cap_val;
+	if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &cap_val) < 0) {
+		cap_free(caps);
+		SKIP(return, "Cannot check CAP_SYS_ADMIN");
+	}
+
+	if (cap_val != CAP_SET) {
+		cap_free(caps);
+		SKIP(return, "Test needs CAP_SYS_ADMIN to start");
+	}
+	cap_free(caps);
+
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	bool correct;
+	ssize_t count_before, count_after;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		struct ns_id_req req = {
+			.size = sizeof(req),
+			.spare = 0,
+			.ns_id = 0,
+			.ns_type = CLONE_NEWNET,
+			.spare2 = 0,
+			.user_ns_id = LISTNS_CURRENT_USER,
+		};
+		__u64 ns_ids_before[100];
+		ssize_t count_before;
+		__u64 ns_ids_after[100];
+		ssize_t count_after;
+		bool correct;
+
+		close(pipefd[0]);
+
+		/* Create user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Count namespaces with CAP_SYS_ADMIN */
+		count_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+
+		/* Drop CAP_SYS_ADMIN */
+		caps = cap_get_proc();
+		if (caps) {
+			cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR);
+			cap_set_flag(caps, CAP_PERMITTED, 1, cap_list, CAP_CLEAR);
+			cap_set_proc(caps);
+			cap_free(caps);
+		}
+
+		/* Ensure we can't regain the capability */
+		prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+
+		/* Count namespaces without CAP_SYS_ADMIN */
+		count_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+
+		/* Without CAP_SYS_ADMIN, we should see same or fewer namespaces */
+		correct = (count_after <= count_before);
+
+		write(pipefd[1], &correct, sizeof(correct));
+		write(pipefd[1], &count_before, sizeof(count_before));
+		write(pipefd[1], &count_after, sizeof(count_after));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+
+	correct = false;
+	count_before = 0;
+	count_after = 0;
+	read(pipefd[0], &correct, sizeof(correct));
+	read(pipefd[0], &count_before, sizeof(count_before));
+	read(pipefd[0], &count_after, sizeof(count_after));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_TRUE(correct);
+	TH_LOG("With CAP_SYS_ADMIN: %zd namespaces, without: %zd namespaces",
+			count_before, count_after);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_test.c b/tools/testing/selftests/namespaces/listns_test.c
new file mode 100644
index 000000000000..8a95789d6a87
--- /dev/null
+++ b/tools/testing/selftests/namespaces/listns_test.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Test basic listns() functionality with the unified namespace tree.
+ * List all active namespaces globally.
+ */
+TEST(listns_basic_unified)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,  /* All types */
+		.spare2 = 0,
+		.user_ns_id = 0,  /* Global listing */
+	};
+	__u64 ns_ids[100];
+	ssize_t ret;
+
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+		ASSERT_TRUE(false);
+	}
+
+	/* Should find at least the initial namespaces */
+	ASSERT_GT(ret, 0);
+	TH_LOG("Found %zd active namespaces", ret);
+
+	/* Verify all returned IDs are non-zero */
+	for (ssize_t i = 0; i < ret; i++) {
+		ASSERT_NE(ns_ids[i], 0);
+		TH_LOG("  [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+	}
+}
+
+/*
+ * Test listns() with type filtering.
+ * List only network namespaces.
+ */
+TEST(listns_filter_by_type)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWNET,  /* Only network namespaces */
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[100];
+	ssize_t ret;
+
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+		ASSERT_TRUE(false);
+	}
+	ASSERT_GE(ret, 0);
+
+	/* Should find at least init_net */
+	ASSERT_GT(ret, 0);
+	TH_LOG("Found %zd active network namespaces", ret);
+
+	/* Verify we can open each namespace and it's actually a network namespace */
+	for (ssize_t i = 0; i < ret && i < 5; i++) {
+		struct nsfs_file_handle nsfh = {
+			.ns_id = ns_ids[i],
+			.ns_type = CLONE_NEWNET,
+			.ns_inum = 0,
+		};
+		struct file_handle *fh;
+		int fd;
+
+		fh = (struct file_handle *)malloc(sizeof(*fh) + sizeof(nsfh));
+		ASSERT_NE(fh, NULL);
+		fh->handle_bytes = sizeof(nsfh);
+		fh->handle_type = 0;
+		memcpy(fh->f_handle, &nsfh, sizeof(nsfh));
+
+		fd = open_by_handle_at(-10003, fh, O_RDONLY);
+		free(fh);
+
+		if (fd >= 0) {
+			int ns_type;
+			/* Verify it's a network namespace via ioctl */
+			ns_type = ioctl(fd, NS_GET_NSTYPE);
+			if (ns_type >= 0) {
+				ASSERT_EQ(ns_type, CLONE_NEWNET);
+			}
+			close(fd);
+		}
+	}
+}
+
+/*
+ * Test listns() pagination.
+ * List namespaces in batches.
+ */
+TEST(listns_pagination)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 batch1[2], batch2[2];
+	ssize_t ret1, ret2;
+
+	/* Get first batch */
+	ret1 = sys_listns(&req, batch1, ARRAY_SIZE(batch1), 0);
+	if (ret1 < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+		ASSERT_TRUE(false);
+	}
+	ASSERT_GE(ret1, 0);
+
+	if (ret1 == 0)
+		SKIP(return, "No namespaces found");
+
+	TH_LOG("First batch: %zd namespaces", ret1);
+
+	/* Get second batch using last ID from first batch */
+	if (ret1 == ARRAY_SIZE(batch1)) {
+		req.ns_id = batch1[ret1 - 1];
+		ret2 = sys_listns(&req, batch2, ARRAY_SIZE(batch2), 0);
+		ASSERT_GE(ret2, 0);
+
+		TH_LOG("Second batch: %zd namespaces (after ns_id=%llu)",
+		       ret2, (unsigned long long)req.ns_id);
+
+		/* If we got more results, verify IDs are monotonically increasing */
+		if (ret2 > 0) {
+			ASSERT_GT(batch2[0], batch1[ret1 - 1]);
+			TH_LOG("Pagination working: %llu > %llu",
+			       (unsigned long long)batch2[0],
+			       (unsigned long long)batch1[ret1 - 1]);
+		}
+	} else {
+		TH_LOG("All namespaces fit in first batch");
+	}
+}
+
+/*
+ * Test listns() with LISTNS_CURRENT_USER.
+ * List namespaces owned by current user namespace.
+ */
+TEST(listns_current_user)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,
+		.spare2 = 0,
+		.user_ns_id = LISTNS_CURRENT_USER,
+	};
+	__u64 ns_ids[100];
+	ssize_t ret;
+
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+		ASSERT_TRUE(false);
+	}
+	ASSERT_GE(ret, 0);
+
+	/* Should find at least the initial namespaces if we're in init_user_ns */
+	TH_LOG("Found %zd namespaces owned by current user namespace", ret);
+
+	for (ssize_t i = 0; i < ret; i++)
+		TH_LOG("  [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+}
+
+/*
+ * Test that listns() only returns active namespaces.
+ * Create a namespace, let it become inactive, verify it's not listed.
+ */
+TEST(listns_only_active)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWNET,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids_before[100], ns_ids_after[100];
+	ssize_t ret_before, ret_after;
+	int pipefd[2];
+	pid_t pid;
+	__u64 new_ns_id = 0;
+	int status;
+
+	/* Get initial list */
+	ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+	if (ret_before < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+		ASSERT_TRUE(false);
+	}
+	ASSERT_GE(ret_before, 0);
+
+	TH_LOG("Before: %zd active network namespaces", ret_before);
+
+	/* Create a new namespace in a child process and get its ID */
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		int fd;
+		__u64 ns_id;
+
+		close(pipefd[0]);
+
+		/* Create new network namespace */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get its ID */
+		fd = open("/proc/self/ns/net", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
+			close(fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Send ID to parent */
+		write(pipefd[1], &ns_id, sizeof(ns_id));
+		close(pipefd[1]);
+
+		/* Keep namespace active briefly */
+		usleep(100000);
+		exit(0);
+	}
+
+	/* Parent reads the new namespace ID */
+	{
+		int bytes;
+
+		close(pipefd[1]);
+		bytes = read(pipefd[0], &new_ns_id, sizeof(new_ns_id));
+		close(pipefd[0]);
+
+		if (bytes == sizeof(new_ns_id)) {
+			__u64 ns_ids_during[100];
+			int ret_during;
+
+			TH_LOG("Child created namespace with ID %llu", (unsigned long long)new_ns_id);
+
+			/* List namespaces while child is still alive - should see new one */
+			ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
+			ASSERT_GE(ret_during, 0);
+			TH_LOG("During: %d active network namespaces", ret_during);
+
+			/* Should have more namespaces than before */
+			ASSERT_GE(ret_during, ret_before);
+		}
+	}
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+
+	/* Give time for namespace to become inactive */
+	usleep(100000);
+
+	/* List namespaces after child exits - should not see new one */
+	ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+	ASSERT_GE(ret_after, 0);
+	TH_LOG("After: %zd active network namespaces", ret_after);
+
+	/* Verify the new namespace ID is not in the after list */
+	if (new_ns_id != 0) {
+		bool found = false;
+
+		for (ssize_t i = 0; i < ret_after; i++) {
+			if (ns_ids_after[i] == new_ns_id) {
+				found = true;
+				break;
+			}
+		}
+		ASSERT_FALSE(found);
+	}
+}
+
+/*
+ * Test listns() with specific user namespace ID.
+ * Create a user namespace and list namespaces it owns.
+ */
+TEST(listns_specific_userns)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,
+		.spare2 = 0,
+		.user_ns_id = 0,  /* Will be filled with created userns ID */
+	};
+	__u64 ns_ids[100];
+	int sv[2];
+	pid_t pid;
+	int status;
+	__u64 user_ns_id = 0;
+	int bytes;
+	ssize_t ret;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		int fd;
+		__u64 ns_id;
+		char buf;
+
+		close(sv[0]);
+
+		/* Create new user namespace */
+		if (setup_userns() < 0) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		/* Get user namespace ID */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
+			close(fd);
+			close(sv[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Send ID to parent */
+		if (write(sv[1], &ns_id, sizeof(ns_id)) != sizeof(ns_id)) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		/* Create some namespaces owned by this user namespace */
+		unshare(CLONE_NEWNET);
+		unshare(CLONE_NEWUTS);
+
+		/* Wait for parent signal */
+		if (read(sv[1], &buf, 1) != 1) {
+			close(sv[1]);
+			exit(1);
+		}
+		close(sv[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(sv[1]);
+	bytes = read(sv[0], &user_ns_id, sizeof(user_ns_id));
+
+	if (bytes != sizeof(user_ns_id)) {
+		close(sv[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get user namespace ID from child");
+	}
+
+	TH_LOG("Child created user namespace with ID %llu", (unsigned long long)user_ns_id);
+
+	/* List namespaces owned by this user namespace */
+	req.user_ns_id = user_ns_id;
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+	if (ret < 0) {
+		TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+		close(sv[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		if (errno == ENOSYS) {
+			SKIP(return, "listns() not supported");
+		}
+		ASSERT_GE(ret, 0);
+	}
+
+	TH_LOG("Found %zd namespaces owned by user namespace %llu", ret,
+	       (unsigned long long)user_ns_id);
+
+	/* Should find at least the network and UTS namespaces we created */
+	if (ret > 0) {
+		for (ssize_t i = 0; i < ret && i < 10; i++)
+			TH_LOG("  [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+	}
+
+	/* Signal child to exit */
+	if (write(sv[0], "X", 1) != 1) {
+		close(sv[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		ASSERT_TRUE(false);
+	}
+	close(sv[0]);
+	waitpid(pid, &status, 0);
+}
+
+/*
+ * Test listns() with multiple namespace types filter.
+ */
+TEST(listns_multiple_types)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWNET | CLONE_NEWUTS,  /* Network and UTS */
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[100];
+	ssize_t ret;
+
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
+		ASSERT_TRUE(false);
+	}
+	ASSERT_GE(ret, 0);
+
+	TH_LOG("Found %zd active network/UTS namespaces", ret);
+
+	for (ssize_t i = 0; i < ret; i++)
+		TH_LOG("  [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
+}
+
+/*
+ * Test that hierarchical active reference propagation keeps parent
+ * user namespaces visible in listns().
+ */
+TEST(listns_hierarchical_visibility)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 parent_ns_id = 0, child_ns_id = 0;
+	int sv[2];
+	pid_t pid;
+	int status;
+	int bytes;
+	__u64 ns_ids[100];
+	ssize_t ret;
+	bool found_parent, found_child;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		int fd;
+		char buf;
+
+		close(sv[0]);
+
+		/* Create parent user namespace */
+		if (setup_userns() < 0) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &parent_ns_id) < 0) {
+			close(fd);
+			close(sv[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Create child user namespace */
+		if (setup_userns() < 0) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		if (ioctl(fd, NS_GET_ID, &child_ns_id) < 0) {
+			close(fd);
+			close(sv[1]);
+			exit(1);
+		}
+		close(fd);
+
+		/* Send both IDs to parent */
+		if (write(sv[1], &parent_ns_id, sizeof(parent_ns_id)) != sizeof(parent_ns_id)) {
+			close(sv[1]);
+			exit(1);
+		}
+		if (write(sv[1], &child_ns_id, sizeof(child_ns_id)) != sizeof(child_ns_id)) {
+			close(sv[1]);
+			exit(1);
+		}
+
+		/* Wait for parent signal */
+		if (read(sv[1], &buf, 1) != 1) {
+			close(sv[1]);
+			exit(1);
+		}
+		close(sv[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(sv[1]);
+
+	/* Read both namespace IDs */
+	bytes = read(sv[0], &parent_ns_id, sizeof(parent_ns_id));
+	bytes += read(sv[0], &child_ns_id, sizeof(child_ns_id));
+
+	if (bytes != (int)(2 * sizeof(__u64))) {
+		close(sv[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to get namespace IDs from child");
+	}
+
+	TH_LOG("Parent user namespace ID: %llu", (unsigned long long)parent_ns_id);
+	TH_LOG("Child user namespace ID: %llu", (unsigned long long)child_ns_id);
+
+	/* List all user namespaces */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+
+	if (ret < 0 && errno == ENOSYS) {
+		close(sv[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "listns() not supported");
+	}
+
+	ASSERT_GE(ret, 0);
+	TH_LOG("Found %zd active user namespaces", ret);
+
+	/* Both parent and child should be visible (active due to child process) */
+	found_parent = false;
+	found_child = false;
+	for (ssize_t i = 0; i < ret; i++) {
+		if (ns_ids[i] == parent_ns_id)
+			found_parent = true;
+		if (ns_ids[i] == child_ns_id)
+			found_child = true;
+	}
+
+	TH_LOG("Parent namespace %s, child namespace %s",
+	       found_parent ? "found" : "NOT FOUND",
+	       found_child ? "found" : "NOT FOUND");
+
+	ASSERT_TRUE(found_child);
+	/* With hierarchical propagation, parent should also be active */
+	ASSERT_TRUE(found_parent);
+
+	/* Signal child to exit */
+	if (write(sv[0], "X", 1) != 1) {
+		close(sv[0]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		ASSERT_TRUE(false);
+	}
+	close(sv[0]);
+	waitpid(pid, &status, 0);
+}
+
+/*
+ * Test error cases for listns().
+ */
+TEST(listns_error_cases)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[10];
+	int ret;
+
+	/* Test with invalid flags */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0xFFFF);
+	if (errno == ENOSYS) {
+		/* listns() not supported, skip this check */
+	} else {
+		ASSERT_LT(ret, 0);
+		ASSERT_EQ(errno, EINVAL);
+	}
+
+	/* Test with NULL ns_ids array */
+	ret = sys_listns(&req, NULL, 10, 0);
+	ASSERT_LT(ret, 0);
+
+	/* Test with invalid spare field */
+	req.spare = 1;
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (errno == ENOSYS) {
+		/* listns() not supported, skip this check */
+	} else {
+		ASSERT_LT(ret, 0);
+		ASSERT_EQ(errno, EINVAL);
+	}
+	req.spare = 0;
+
+	/* Test with huge nr_ns_ids */
+	ret = sys_listns(&req, ns_ids, 2000000, 0);
+	if (errno == ENOSYS) {
+		/* listns() not supported, skip this check */
+	} else {
+		ASSERT_LT(ret, 0);
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/ns_active_ref_test.c b/tools/testing/selftests/namespaces/ns_active_ref_test.c
new file mode 100644
index 000000000000..093268f0efaa
--- /dev/null
+++ b/tools/testing/selftests/namespaces/ns_active_ref_test.c
@@ -0,0 +1,2672 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/nsfs.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <pthread.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
+#endif
+
+#ifndef FILEID_NSFS
+#define FILEID_NSFS 0xf1
+#endif
+
+/*
+ * Test that initial namespaces can be reopened via file handle.
+ * Initial namespaces should have active ref count of 1 from boot.
+ */
+TEST(init_ns_always_active)
+{
+	struct file_handle *handle;
+	int mount_id;
+	int ret;
+	int fd1, fd2;
+	struct stat st1, st2;
+
+	handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+	ASSERT_NE(handle, NULL);
+
+	/* Open initial network namespace */
+	fd1 = open("/proc/1/ns/net", O_RDONLY);
+	ASSERT_GE(fd1, 0);
+
+	/* Get file handle for initial namespace */
+	handle->handle_bytes = MAX_HANDLE_SZ;
+	ret = name_to_handle_at(fd1, "", handle, &mount_id, AT_EMPTY_PATH);
+	if (ret < 0 && errno == EOPNOTSUPP) {
+		SKIP(free(handle); close(fd1);
+		     return, "nsfs doesn't support file handles");
+	}
+	ASSERT_EQ(ret, 0);
+
+	/* Close the namespace fd */
+	close(fd1);
+
+	/* Try to reopen via file handle - should succeed since init ns is always active */
+	fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	if (fd2 < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+		SKIP(free(handle);
+		     return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+	}
+	ASSERT_GE(fd2, 0);
+
+	/* Verify we opened the same namespace */
+	fd1 = open("/proc/1/ns/net", O_RDONLY);
+	ASSERT_GE(fd1, 0);
+	ASSERT_EQ(fstat(fd1, &st1), 0);
+	ASSERT_EQ(fstat(fd2, &st2), 0);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+	close(fd1);
+	close(fd2);
+	free(handle);
+}
+
+/*
+ * Test namespace lifecycle: create a namespace in a child process,
+ * get a file handle while it's active, then try to reopen after
+ * the process exits (namespace becomes inactive).
+ */
+TEST(ns_inactive_after_exit)
+{
+	struct file_handle *handle;
+	int mount_id;
+	int ret;
+	int fd;
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+	/* Create pipe for passing file handle from child */
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		close(pipefd[0]);
+
+		/* Create new network namespace */
+		ret = unshare(CLONE_NEWNET);
+		if (ret < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Open our new namespace */
+		fd = open("/proc/self/ns/net", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Get file handle for the namespace */
+		handle = (struct file_handle *)buf;
+		handle->handle_bytes = MAX_HANDLE_SZ;
+		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+		close(fd);
+
+		if (ret < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Send handle to parent */
+		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+		close(pipefd[1]);
+
+		/* Exit - namespace should become inactive */
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	/* Read file handle from child */
+	ret = read(pipefd[0], buf, sizeof(buf));
+	close(pipefd[0]);
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_GT(ret, 0);
+	handle = (struct file_handle *)buf;
+
+	/* Try to reopen namespace - should fail with ENOENT since it's inactive */
+	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(fd, 0);
+	/* Should fail with ENOENT (namespace inactive) or ESTALE */
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that a namespace remains active while a process is using it,
+ * even after the creating process exits.
+ */
+TEST(ns_active_with_multiple_processes)
+{
+	struct file_handle *handle;
+	int mount_id;
+	int ret;
+	int fd;
+	int pipefd[2];
+	int syncpipe[2];
+	pid_t pid1, pid2;
+	int status;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+	char sync_byte;
+
+	/* Create pipes for communication */
+	ASSERT_EQ(pipe(pipefd), 0);
+	ASSERT_EQ(pipe(syncpipe), 0);
+
+	pid1 = fork();
+	ASSERT_GE(pid1, 0);
+
+	if (pid1 == 0) {
+		/* First child - creates namespace */
+		close(pipefd[0]);
+		close(syncpipe[1]);
+
+		/* Create new network namespace */
+		ret = unshare(CLONE_NEWNET);
+		if (ret < 0) {
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		/* Open and get handle */
+		fd = open("/proc/self/ns/net", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		handle = (struct file_handle *)buf;
+		handle->handle_bytes = MAX_HANDLE_SZ;
+		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+		close(fd);
+
+		if (ret < 0) {
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		/* Send handle to parent */
+		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+		close(pipefd[1]);
+
+		/* Wait for signal before exiting */
+		read(syncpipe[0], &sync_byte, 1);
+		close(syncpipe[0]);
+		exit(0);
+	}
+
+	/* Parent reads handle */
+	close(pipefd[1]);
+	ret = read(pipefd[0], buf, sizeof(buf));
+	close(pipefd[0]);
+	ASSERT_GT(ret, 0);
+
+	handle = (struct file_handle *)buf;
+
+	/* Create second child that will keep namespace active */
+	pid2 = fork();
+	ASSERT_GE(pid2, 0);
+
+	if (pid2 == 0) {
+		/* Second child - reopens the namespace */
+		close(syncpipe[0]);
+		close(syncpipe[1]);
+
+		/* Open the namespace via handle */
+		fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+		if (fd < 0) {
+			exit(1);
+		}
+
+		/* Join the namespace */
+		ret = setns(fd, CLONE_NEWNET);
+		close(fd);
+		if (ret < 0) {
+			exit(1);
+		}
+
+		/* Sleep to keep namespace active */
+		sleep(1);
+		exit(0);
+	}
+
+	/* Let second child enter the namespace */
+	usleep(100000); /* 100ms */
+
+	/* Signal first child to exit */
+	close(syncpipe[0]);
+	sync_byte = 'X';
+	write(syncpipe[1], &sync_byte, 1);
+	close(syncpipe[1]);
+
+	/* Wait for first child */
+	waitpid(pid1, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+
+	/* Namespace should still be active because second child is using it */
+	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_GE(fd, 0);
+	close(fd);
+
+	/* Wait for second child */
+	waitpid(pid2, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+}
+
+/*
+ * Test user namespace active ref tracking via credential lifecycle
+ */
+TEST(userns_active_ref_lifecycle)
+{
+	struct file_handle *handle;
+	int mount_id;
+	int ret;
+	int fd;
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		close(pipefd[0]);
+
+		/* Create new user namespace */
+		ret = unshare(CLONE_NEWUSER);
+		if (ret < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Set up uid/gid mappings */
+		int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+		int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+		int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+		if (uid_map_fd >= 0 && gid_map_fd >= 0 && setgroups_fd >= 0) {
+			write(setgroups_fd, "deny", 4);
+			close(setgroups_fd);
+
+			char mapping[64];
+			snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+			write(uid_map_fd, mapping, strlen(mapping));
+			close(uid_map_fd);
+
+			snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+			write(gid_map_fd, mapping, strlen(mapping));
+			close(gid_map_fd);
+		}
+
+		/* Get file handle */
+		fd = open("/proc/self/ns/user", O_RDONLY);
+		if (fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		handle = (struct file_handle *)buf;
+		handle->handle_bytes = MAX_HANDLE_SZ;
+		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+		close(fd);
+
+		if (ret < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Send handle to parent */
+		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+	ret = read(pipefd[0], buf, sizeof(buf));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_GT(ret, 0);
+	handle = (struct file_handle *)buf;
+
+	/* Namespace should be inactive after all tasks exit */
+	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(fd, 0);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test PID namespace active ref tracking
+ */
+TEST(pidns_active_ref_lifecycle)
+{
+	struct file_handle *handle;
+	int mount_id;
+	int ret;
+	int fd;
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		close(pipefd[0]);
+
+		/* Create new PID namespace */
+		ret = unshare(CLONE_NEWPID);
+		if (ret < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		/* Fork to actually enter the PID namespace */
+		pid_t child = fork();
+		if (child < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		if (child == 0) {
+			/* Grandchild - in new PID namespace */
+			fd = open("/proc/self/ns/pid", O_RDONLY);
+			if (fd < 0) {
+				exit(1);
+			}
+
+			handle = (struct file_handle *)buf;
+			handle->handle_bytes = MAX_HANDLE_SZ;
+			ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+			close(fd);
+
+			if (ret < 0) {
+				exit(1);
+			}
+
+			/* Send handle to grandparent */
+			write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+			close(pipefd[1]);
+			exit(0);
+		}
+
+		/* Wait for grandchild */
+		waitpid(child, NULL, 0);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+	ret = read(pipefd[0], buf, sizeof(buf));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_GT(ret, 0);
+	handle = (struct file_handle *)buf;
+
+	/* Namespace should be inactive after all processes exit */
+	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(fd, 0);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that an open file descriptor keeps a namespace active.
+ * Even after the creating process exits, the namespace should remain
+ * active as long as an fd is held open.
+ */
+TEST(ns_fd_keeps_active)
+{
+	struct file_handle *handle;
+	int mount_id;
+	int ret;
+	int nsfd;
+	int pipe_child_ready[2];
+	int pipe_parent_ready[2];
+	pid_t pid;
+	int status;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+	char sync_byte;
+	char proc_path[64];
+
+	ASSERT_EQ(pipe(pipe_child_ready), 0);
+	ASSERT_EQ(pipe(pipe_parent_ready), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		close(pipe_child_ready[0]);
+		close(pipe_parent_ready[1]);
+
+		TH_LOG("Child: creating new network namespace");
+
+		/* Create new network namespace */
+		ret = unshare(CLONE_NEWNET);
+		if (ret < 0) {
+			TH_LOG("Child: unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+			close(pipe_child_ready[1]);
+			close(pipe_parent_ready[0]);
+			exit(1);
+		}
+
+		TH_LOG("Child: network namespace created successfully");
+
+		/* Get file handle for the namespace */
+		nsfd = open("/proc/self/ns/net", O_RDONLY);
+		if (nsfd < 0) {
+			TH_LOG("Child: failed to open /proc/self/ns/net: %s", strerror(errno));
+			close(pipe_child_ready[1]);
+			close(pipe_parent_ready[0]);
+			exit(1);
+		}
+
+		TH_LOG("Child: opened namespace fd %d", nsfd);
+
+		handle = (struct file_handle *)buf;
+		handle->handle_bytes = MAX_HANDLE_SZ;
+		ret = name_to_handle_at(nsfd, "", handle, &mount_id, AT_EMPTY_PATH);
+		close(nsfd);
+
+		if (ret < 0) {
+			TH_LOG("Child: name_to_handle_at failed: %s", strerror(errno));
+			close(pipe_child_ready[1]);
+			close(pipe_parent_ready[0]);
+			exit(1);
+		}
+
+		TH_LOG("Child: got file handle (bytes=%u)", handle->handle_bytes);
+
+		/* Send file handle to parent */
+		ret = write(pipe_child_ready[1], buf, sizeof(*handle) + handle->handle_bytes);
+		TH_LOG("Child: sent %d bytes of file handle to parent", ret);
+		close(pipe_child_ready[1]);
+
+		/* Wait for parent to open the fd */
+		TH_LOG("Child: waiting for parent to open fd");
+		ret = read(pipe_parent_ready[0], &sync_byte, 1);
+		close(pipe_parent_ready[0]);
+
+		TH_LOG("Child: parent signaled (read %d bytes), exiting now", ret);
+		/* Exit - namespace should stay active because parent holds fd */
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipe_child_ready[1]);
+	close(pipe_parent_ready[0]);
+
+	TH_LOG("Parent: reading file handle from child");
+
+	/* Read file handle from child */
+	ret = read(pipe_child_ready[0], buf, sizeof(buf));
+	close(pipe_child_ready[0]);
+	ASSERT_GT(ret, 0);
+	handle = (struct file_handle *)buf;
+
+	TH_LOG("Parent: received %d bytes, handle size=%u", ret, handle->handle_bytes);
+
+	/* Open the child's namespace while it's still alive */
+	snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/net", pid);
+	TH_LOG("Parent: opening child's namespace at %s", proc_path);
+	nsfd = open(proc_path, O_RDONLY);
+	if (nsfd < 0) {
+		TH_LOG("Parent: failed to open %s: %s", proc_path, strerror(errno));
+		close(pipe_parent_ready[1]);
+		kill(pid, SIGKILL);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open child's namespace");
+	}
+
+	TH_LOG("Parent: opened child's namespace, got fd %d", nsfd);
+
+	/* Signal child that we have the fd */
+	sync_byte = 'G';
+	write(pipe_parent_ready[1], &sync_byte, 1);
+	close(pipe_parent_ready[1]);
+	TH_LOG("Parent: signaled child that we have the fd");
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	TH_LOG("Child exited, parent holds fd %d to namespace", nsfd);
+
+	/*
+	 * Namespace should still be ACTIVE because we hold an fd.
+	 * We should be able to reopen it via file handle.
+	 */
+	TH_LOG("Attempting to reopen namespace via file handle (should succeed - fd held)");
+	int fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_GE(fd2, 0);
+
+	TH_LOG("Successfully reopened namespace via file handle, got fd %d", fd2);
+
+	/* Verify it's the same namespace */
+	struct stat st1, st2;
+	ASSERT_EQ(fstat(nsfd, &st1), 0);
+	ASSERT_EQ(fstat(fd2, &st2), 0);
+	TH_LOG("Namespace inodes: nsfd=%lu, fd2=%lu", st1.st_ino, st2.st_ino);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+	close(fd2);
+
+	/* Now close the fd - namespace should become inactive */
+	TH_LOG("Closing fd %d - namespace should become inactive", nsfd);
+	close(nsfd);
+
+	/* Now reopening should fail - namespace is inactive */
+	TH_LOG("Attempting to reopen namespace via file handle (should fail - inactive)");
+	fd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(fd2, 0);
+	/* Should fail with ENOENT (inactive) or ESTALE (gone) */
+	TH_LOG("Reopen failed as expected: %s (errno=%d)", strerror(errno), errno);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test hierarchical active reference propagation.
+ * When a child namespace is active, its owning user namespace should also
+ * be active automatically due to hierarchical active reference propagation.
+ * This ensures parents are always reachable when children are active.
+ */
+TEST(ns_parent_always_reachable)
+{
+	struct file_handle *parent_handle, *child_handle;
+	int ret;
+	int child_nsfd;
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	__u64 parent_id, child_id;
+	char parent_buf[sizeof(*parent_handle) + MAX_HANDLE_SZ];
+	char child_buf[sizeof(*child_handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		close(pipefd[0]);
+
+		TH_LOG("Child: creating parent user namespace and setting up mappings");
+
+		/* Create parent user namespace with mappings */
+		ret = setup_userns();
+		if (ret < 0) {
+			TH_LOG("Child: setup_userns() for parent failed: %s", strerror(errno));
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		TH_LOG("Child: parent user namespace created, now uid=%d gid=%d", getuid(), getgid());
+
+		/* Get namespace ID for parent user namespace */
+		int parent_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (parent_fd < 0) {
+			TH_LOG("Child: failed to open parent /proc/self/ns/user: %s", strerror(errno));
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		TH_LOG("Child: opened parent userns fd %d", parent_fd);
+
+		if (ioctl(parent_fd, NS_GET_ID, &parent_id) < 0) {
+			TH_LOG("Child: NS_GET_ID for parent failed: %s", strerror(errno));
+			close(parent_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(parent_fd);
+
+		TH_LOG("Child: got parent namespace ID %llu", (unsigned long long)parent_id);
+
+		/* Create child user namespace within parent */
+		TH_LOG("Child: creating nested child user namespace");
+		ret = setup_userns();
+		if (ret < 0) {
+			TH_LOG("Child: setup_userns() for child failed: %s", strerror(errno));
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		TH_LOG("Child: nested child user namespace created, uid=%d gid=%d", getuid(), getgid());
+
+		/* Get namespace ID for child user namespace */
+		int child_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (child_fd < 0) {
+			TH_LOG("Child: failed to open child /proc/self/ns/user: %s", strerror(errno));
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		TH_LOG("Child: opened child userns fd %d", child_fd);
+
+		if (ioctl(child_fd, NS_GET_ID, &child_id) < 0) {
+			TH_LOG("Child: NS_GET_ID for child failed: %s", strerror(errno));
+			close(child_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(child_fd);
+
+		TH_LOG("Child: got child namespace ID %llu", (unsigned long long)child_id);
+
+		/* Send both namespace IDs to parent */
+		TH_LOG("Child: sending both namespace IDs to parent");
+		write(pipefd[1], &parent_id, sizeof(parent_id));
+		write(pipefd[1], &child_id, sizeof(child_id));
+		close(pipefd[1]);
+
+		TH_LOG("Child: exiting - parent userns should become inactive");
+		/* Exit - parent user namespace should become inactive */
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+
+	TH_LOG("Parent: reading both namespace IDs from child");
+
+	/* Read both namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &parent_id, sizeof(parent_id));
+	if (ret != sizeof(parent_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read parent namespace ID from child");
+	}
+
+	ret = read(pipefd[0], &child_id, sizeof(child_id));
+	close(pipefd[0]);
+	if (ret != sizeof(child_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read child namespace ID from child");
+	}
+
+	TH_LOG("Parent: received parent_id=%llu, child_id=%llu",
+	       (unsigned long long)parent_id, (unsigned long long)child_id);
+
+	/* Construct file handles from namespace IDs */
+	parent_handle = (struct file_handle *)parent_buf;
+	parent_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	parent_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *parent_fh = (struct nsfs_file_handle *)parent_handle->f_handle;
+	parent_fh->ns_id = parent_id;
+	parent_fh->ns_type = 0;
+	parent_fh->ns_inum = 0;
+
+	child_handle = (struct file_handle *)child_buf;
+	child_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	child_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *child_fh = (struct nsfs_file_handle *)child_handle->f_handle;
+	child_fh->ns_id = child_id;
+	child_fh->ns_type = 0;
+	child_fh->ns_inum = 0;
+
+	TH_LOG("Parent: opening child namespace BEFORE child exits");
+
+	/* Open child namespace while child is still alive to keep it active */
+	child_nsfd = open_by_handle_at(FD_NSFS_ROOT, child_handle, O_RDONLY);
+	if (child_nsfd < 0) {
+		TH_LOG("Failed to open child namespace: %s (errno=%d)", strerror(errno), errno);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open child namespace");
+	}
+
+	TH_LOG("Opened child namespace fd %d", child_nsfd);
+
+	/* Now wait for child to exit */
+	TH_LOG("Parent: waiting for child to exit");
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	TH_LOG("Child process exited, parent holds fd to child namespace");
+
+	/*
+	 * With hierarchical active reference propagation:
+	 * Since the child namespace is active (parent process holds fd),
+	 * the parent user namespace should ALSO be active automatically.
+	 * This is because when we took an active reference on the child,
+	 * it propagated up to the owning user namespace.
+	 */
+	TH_LOG("Attempting to reopen parent namespace (should SUCCEED - hierarchical propagation)");
+	int parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+	ASSERT_GE(parent_fd, 0);
+
+	TH_LOG("SUCCESS: Parent namespace is active (fd=%d) due to active child", parent_fd);
+
+	/* Verify we can also get parent via NS_GET_USERNS */
+	TH_LOG("Verifying NS_GET_USERNS also works");
+	int parent_fd2 = ioctl(child_nsfd, NS_GET_USERNS);
+	if (parent_fd2 < 0) {
+		close(parent_fd);
+		close(child_nsfd);
+		TH_LOG("NS_GET_USERNS failed: %s (errno=%d)", strerror(errno), errno);
+		SKIP(return, "NS_GET_USERNS not supported or failed");
+	}
+
+	TH_LOG("NS_GET_USERNS succeeded, got parent fd %d", parent_fd2);
+
+	/* Verify both methods give us the same namespace */
+	struct stat st1, st2;
+	ASSERT_EQ(fstat(parent_fd, &st1), 0);
+	ASSERT_EQ(fstat(parent_fd2, &st2), 0);
+	TH_LOG("Parent namespace inodes: parent_fd=%lu, parent_fd2=%lu", st1.st_ino, st2.st_ino);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+	/*
+	 * Close child fd - parent should remain active because we still
+	 * hold direct references to it (parent_fd and parent_fd2).
+	 */
+	TH_LOG("Closing child fd - parent should remain active (direct refs held)");
+	close(child_nsfd);
+
+	/* Parent should still be openable */
+	TH_LOG("Verifying parent still active via file handle");
+	int parent_fd3 = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+	ASSERT_GE(parent_fd3, 0);
+	close(parent_fd3);
+
+	TH_LOG("Closing all fds to parent namespace");
+	close(parent_fd);
+	close(parent_fd2);
+
+	/* Both should now be inactive */
+	TH_LOG("Attempting to reopen parent (should fail - inactive, no refs)");
+	parent_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+	ASSERT_LT(parent_fd, 0);
+	TH_LOG("Parent inactive as expected: %s (errno=%d)", strerror(errno), errno);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that bind mounts keep namespaces in the tree even when inactive
+ */
+TEST(ns_bind_mount_keeps_in_tree)
+{
+	struct file_handle *handle;
+	int mount_id;
+	int ret;
+	int fd;
+	int pipefd[2];
+	pid_t pid;
+	int status;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+	char tmpfile[] = "/tmp/ns-test-XXXXXX";
+	int tmpfd;
+
+	/* Create temporary file for bind mount */
+	tmpfd = mkstemp(tmpfile);
+	if (tmpfd < 0) {
+		SKIP(return, "Cannot create temporary file");
+	}
+	close(tmpfd);
+
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		close(pipefd[0]);
+
+		/* Unshare mount namespace and make mounts private to avoid propagation */
+		ret = unshare(CLONE_NEWNS);
+		if (ret < 0) {
+			close(pipefd[1]);
+			unlink(tmpfile);
+			exit(1);
+		}
+		ret = mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL);
+		if (ret < 0) {
+			close(pipefd[1]);
+			unlink(tmpfile);
+			exit(1);
+		}
+
+		/* Create new network namespace */
+		ret = unshare(CLONE_NEWNET);
+		if (ret < 0) {
+			close(pipefd[1]);
+			unlink(tmpfile);
+			exit(1);
+		}
+
+		/* Bind mount the namespace */
+		ret = mount("/proc/self/ns/net", tmpfile, NULL, MS_BIND, NULL);
+		if (ret < 0) {
+			close(pipefd[1]);
+			unlink(tmpfile);
+			exit(1);
+		}
+
+		/* Get file handle */
+		fd = open("/proc/self/ns/net", O_RDONLY);
+		if (fd < 0) {
+			umount(tmpfile);
+			close(pipefd[1]);
+			unlink(tmpfile);
+			exit(1);
+		}
+
+		handle = (struct file_handle *)buf;
+		handle->handle_bytes = MAX_HANDLE_SZ;
+		ret = name_to_handle_at(fd, "", handle, &mount_id, AT_EMPTY_PATH);
+		close(fd);
+
+		if (ret < 0) {
+			umount(tmpfile);
+			close(pipefd[1]);
+			unlink(tmpfile);
+			exit(1);
+		}
+
+		/* Send handle to parent */
+		write(pipefd[1], buf, sizeof(*handle) + handle->handle_bytes);
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(pipefd[1]);
+	ret = read(pipefd[0], buf, sizeof(buf));
+	close(pipefd[0]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	ASSERT_GT(ret, 0);
+	handle = (struct file_handle *)buf;
+
+	/*
+	 * Namespace should be inactive but still in tree due to bind mount.
+	 * Reopening should fail with ENOENT (inactive) not ESTALE (not in tree).
+	 */
+	fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(fd, 0);
+	/* Should be ENOENT (inactive) since bind mount keeps it in tree */
+	if (errno != ENOENT && errno != ESTALE) {
+		TH_LOG("Unexpected error: %d", errno);
+	}
+
+	/* Cleanup */
+	umount(tmpfile);
+	unlink(tmpfile);
+}
+
+/*
+ * Test multi-level hierarchy (3+ levels deep).
+ * Grandparent → Parent → Child
+ * When child is active, both parent AND grandparent should be active.
+ */
+TEST(ns_multilevel_hierarchy)
+{
+	struct file_handle *gp_handle, *p_handle, *c_handle;
+	int ret, pipefd[2];
+	pid_t pid;
+	int status;
+	__u64 gp_id, p_id, c_id;
+	char gp_buf[sizeof(*gp_handle) + MAX_HANDLE_SZ];
+	char p_buf[sizeof(*p_handle) + MAX_HANDLE_SZ];
+	char c_buf[sizeof(*c_handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(pipefd[0]);
+
+		/* Create grandparent user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int gp_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (gp_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(gp_fd, NS_GET_ID, &gp_id) < 0) {
+			close(gp_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(gp_fd);
+
+		/* Create parent user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int p_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (p_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+			close(p_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(p_fd);
+
+		/* Create child user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int c_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (c_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(c_fd, NS_GET_ID, &c_id) < 0) {
+			close(c_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(c_fd);
+
+		/* Send all three namespace IDs */
+		write(pipefd[1], &gp_id, sizeof(gp_id));
+		write(pipefd[1], &p_id, sizeof(p_id));
+		write(pipefd[1], &c_id, sizeof(c_id));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	close(pipefd[1]);
+
+	/* Read all three namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &gp_id, sizeof(gp_id));
+	if (ret != sizeof(gp_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read grandparent namespace ID from child");
+	}
+
+	ret = read(pipefd[0], &p_id, sizeof(p_id));
+	if (ret != sizeof(p_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read parent namespace ID from child");
+	}
+
+	ret = read(pipefd[0], &c_id, sizeof(c_id));
+	close(pipefd[0]);
+	if (ret != sizeof(c_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read child namespace ID from child");
+	}
+
+	/* Construct file handles from namespace IDs */
+	gp_handle = (struct file_handle *)gp_buf;
+	gp_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	gp_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *gp_fh = (struct nsfs_file_handle *)gp_handle->f_handle;
+	gp_fh->ns_id = gp_id;
+	gp_fh->ns_type = 0;
+	gp_fh->ns_inum = 0;
+
+	p_handle = (struct file_handle *)p_buf;
+	p_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	p_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)p_handle->f_handle;
+	p_fh->ns_id = p_id;
+	p_fh->ns_type = 0;
+	p_fh->ns_inum = 0;
+
+	c_handle = (struct file_handle *)c_buf;
+	c_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	c_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *c_fh = (struct nsfs_file_handle *)c_handle->f_handle;
+	c_fh->ns_id = c_id;
+	c_fh->ns_type = 0;
+	c_fh->ns_inum = 0;
+
+	/* Open child before process exits */
+	int c_fd = open_by_handle_at(FD_NSFS_ROOT, c_handle, O_RDONLY);
+	if (c_fd < 0) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open child namespace");
+	}
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/*
+	 * With 3-level hierarchy and child active:
+	 * - Child is active (we hold fd)
+	 * - Parent should be active (propagated from child)
+	 * - Grandparent should be active (propagated from parent)
+	 */
+	TH_LOG("Testing parent active when child is active");
+	int p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+	ASSERT_GE(p_fd, 0);
+
+	TH_LOG("Testing grandparent active when child is active");
+	int gp_fd = open_by_handle_at(FD_NSFS_ROOT, gp_handle, O_RDONLY);
+	ASSERT_GE(gp_fd, 0);
+
+	close(c_fd);
+	close(p_fd);
+	close(gp_fd);
+}
+
+/*
+ * Test multiple children sharing same parent.
+ * Parent should stay active as long as ANY child is active.
+ */
+TEST(ns_multiple_children_same_parent)
+{
+	struct file_handle *p_handle, *c1_handle, *c2_handle;
+	int ret, pipefd[2];
+	pid_t pid;
+	int status;
+	__u64 p_id, c1_id, c2_id;
+	char p_buf[sizeof(*p_handle) + MAX_HANDLE_SZ];
+	char c1_buf[sizeof(*c1_handle) + MAX_HANDLE_SZ];
+	char c2_buf[sizeof(*c2_handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(pipefd[0]);
+
+		/* Create parent user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int p_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (p_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+			close(p_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(p_fd);
+
+		/* Create first child user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int c1_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (c1_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(c1_fd, NS_GET_ID, &c1_id) < 0) {
+			close(c1_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(c1_fd);
+
+		/* Return to parent user namespace and create second child */
+		/* We can't actually do this easily, so let's create a sibling namespace
+		 * by creating a network namespace instead */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int c2_fd = open("/proc/self/ns/net", O_RDONLY);
+		if (c2_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(c2_fd, NS_GET_ID, &c2_id) < 0) {
+			close(c2_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(c2_fd);
+
+		/* Send all namespace IDs */
+		write(pipefd[1], &p_id, sizeof(p_id));
+		write(pipefd[1], &c1_id, sizeof(c1_id));
+		write(pipefd[1], &c2_id, sizeof(c2_id));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	close(pipefd[1]);
+
+	/* Read all three namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &p_id, sizeof(p_id));
+	if (ret != sizeof(p_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read parent namespace ID");
+	}
+
+	ret = read(pipefd[0], &c1_id, sizeof(c1_id));
+	if (ret != sizeof(c1_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read first child namespace ID");
+	}
+
+	ret = read(pipefd[0], &c2_id, sizeof(c2_id));
+	close(pipefd[0]);
+	if (ret != sizeof(c2_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read second child namespace ID");
+	}
+
+	/* Construct file handles from namespace IDs */
+	p_handle = (struct file_handle *)p_buf;
+	p_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	p_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)p_handle->f_handle;
+	p_fh->ns_id = p_id;
+	p_fh->ns_type = 0;
+	p_fh->ns_inum = 0;
+
+	c1_handle = (struct file_handle *)c1_buf;
+	c1_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	c1_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *c1_fh = (struct nsfs_file_handle *)c1_handle->f_handle;
+	c1_fh->ns_id = c1_id;
+	c1_fh->ns_type = 0;
+	c1_fh->ns_inum = 0;
+
+	c2_handle = (struct file_handle *)c2_buf;
+	c2_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	c2_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *c2_fh = (struct nsfs_file_handle *)c2_handle->f_handle;
+	c2_fh->ns_id = c2_id;
+	c2_fh->ns_type = 0;
+	c2_fh->ns_inum = 0;
+
+	/* Open both children before process exits */
+	int c1_fd = open_by_handle_at(FD_NSFS_ROOT, c1_handle, O_RDONLY);
+	int c2_fd = open_by_handle_at(FD_NSFS_ROOT, c2_handle, O_RDONLY);
+
+	if (c1_fd < 0 || c2_fd < 0) {
+		if (c1_fd >= 0) close(c1_fd);
+		if (c2_fd >= 0) close(c2_fd);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open child namespaces");
+	}
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Parent should be active (both children active) */
+	TH_LOG("Both children active - parent should be active");
+	int p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+	ASSERT_GE(p_fd, 0);
+	close(p_fd);
+
+	/* Close first child - parent should STILL be active */
+	TH_LOG("Closing first child - parent should still be active");
+	close(c1_fd);
+	p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+	ASSERT_GE(p_fd, 0);
+	close(p_fd);
+
+	/* Close second child - NOW parent should become inactive */
+	TH_LOG("Closing second child - parent should become inactive");
+	close(c2_fd);
+	p_fd = open_by_handle_at(FD_NSFS_ROOT, p_handle, O_RDONLY);
+	ASSERT_LT(p_fd, 0);
+}
+
+/*
+ * Test that different namespace types with same owner all contribute
+ * active references to the owning user namespace.
+ */
+TEST(ns_different_types_same_owner)
+{
+	struct file_handle *u_handle, *n_handle, *ut_handle;
+	int ret, pipefd[2];
+	pid_t pid;
+	int status;
+	__u64 u_id, n_id, ut_id;
+	char u_buf[sizeof(*u_handle) + MAX_HANDLE_SZ];
+	char n_buf[sizeof(*n_handle) + MAX_HANDLE_SZ];
+	char ut_buf[sizeof(*ut_handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(pipefd[0]);
+
+		/* Create user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int u_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (u_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(u_fd, NS_GET_ID, &u_id) < 0) {
+			close(u_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(u_fd);
+
+		/* Create network namespace (owned by user namespace) */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int n_fd = open("/proc/self/ns/net", O_RDONLY);
+		if (n_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(n_fd, NS_GET_ID, &n_id) < 0) {
+			close(n_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(n_fd);
+
+		/* Create UTS namespace (also owned by user namespace) */
+		if (unshare(CLONE_NEWUTS) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int ut_fd = open("/proc/self/ns/uts", O_RDONLY);
+		if (ut_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(ut_fd, NS_GET_ID, &ut_id) < 0) {
+			close(ut_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(ut_fd);
+
+		/* Send all namespace IDs */
+		write(pipefd[1], &u_id, sizeof(u_id));
+		write(pipefd[1], &n_id, sizeof(n_id));
+		write(pipefd[1], &ut_id, sizeof(ut_id));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	close(pipefd[1]);
+
+	/* Read all three namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &u_id, sizeof(u_id));
+	if (ret != sizeof(u_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read user namespace ID");
+	}
+
+	ret = read(pipefd[0], &n_id, sizeof(n_id));
+	if (ret != sizeof(n_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read network namespace ID");
+	}
+
+	ret = read(pipefd[0], &ut_id, sizeof(ut_id));
+	close(pipefd[0]);
+	if (ret != sizeof(ut_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read UTS namespace ID");
+	}
+
+	/* Construct file handles from namespace IDs */
+	u_handle = (struct file_handle *)u_buf;
+	u_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	u_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *u_fh = (struct nsfs_file_handle *)u_handle->f_handle;
+	u_fh->ns_id = u_id;
+	u_fh->ns_type = 0;
+	u_fh->ns_inum = 0;
+
+	n_handle = (struct file_handle *)n_buf;
+	n_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	n_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *n_fh = (struct nsfs_file_handle *)n_handle->f_handle;
+	n_fh->ns_id = n_id;
+	n_fh->ns_type = 0;
+	n_fh->ns_inum = 0;
+
+	ut_handle = (struct file_handle *)ut_buf;
+	ut_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	ut_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *ut_fh = (struct nsfs_file_handle *)ut_handle->f_handle;
+	ut_fh->ns_id = ut_id;
+	ut_fh->ns_type = 0;
+	ut_fh->ns_inum = 0;
+
+	/* Open both non-user namespaces before process exits */
+	int n_fd = open_by_handle_at(FD_NSFS_ROOT, n_handle, O_RDONLY);
+	int ut_fd = open_by_handle_at(FD_NSFS_ROOT, ut_handle, O_RDONLY);
+
+	if (n_fd < 0 || ut_fd < 0) {
+		if (n_fd >= 0) close(n_fd);
+		if (ut_fd >= 0) close(ut_fd);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open namespaces");
+	}
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/*
+	 * Both network and UTS namespaces are active.
+	 * User namespace should be active (gets 2 active refs).
+	 */
+	TH_LOG("Both net and uts active - user namespace should be active");
+	int u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+	ASSERT_GE(u_fd, 0);
+	close(u_fd);
+
+	/* Close network namespace - user namespace should STILL be active */
+	TH_LOG("Closing network ns - user ns should still be active (uts still active)");
+	close(n_fd);
+	u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+	ASSERT_GE(u_fd, 0);
+	close(u_fd);
+
+	/* Close UTS namespace - user namespace should become inactive */
+	TH_LOG("Closing uts ns - user ns should become inactive");
+	close(ut_fd);
+	u_fd = open_by_handle_at(FD_NSFS_ROOT, u_handle, O_RDONLY);
+	ASSERT_LT(u_fd, 0);
+}
+
+/*
+ * Test hierarchical propagation with deep namespace hierarchy.
+ * Create: init_user_ns -> user_A -> user_B -> net_ns
+ * When net_ns is active, both user_A and user_B should be active.
+ * This verifies the conditional recursion in __ns_ref_active_put() works.
+ */
+TEST(ns_deep_hierarchy_propagation)
+{
+	struct file_handle *ua_handle, *ub_handle, *net_handle;
+	int ret, pipefd[2];
+	pid_t pid;
+	int status;
+	__u64 ua_id, ub_id, net_id;
+	char ua_buf[sizeof(*ua_handle) + MAX_HANDLE_SZ];
+	char ub_buf[sizeof(*ub_handle) + MAX_HANDLE_SZ];
+	char net_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(pipefd[0]);
+
+		/* Create user_A -> user_B -> net hierarchy */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int ua_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (ua_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(ua_fd, NS_GET_ID, &ua_id) < 0) {
+			close(ua_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(ua_fd);
+
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int ub_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (ub_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(ub_fd, NS_GET_ID, &ub_id) < 0) {
+			close(ub_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(ub_fd);
+
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int net_fd = open("/proc/self/ns/net", O_RDONLY);
+		if (net_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(net_fd, NS_GET_ID, &net_id) < 0) {
+			close(net_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(net_fd);
+
+		/* Send all three namespace IDs */
+		write(pipefd[1], &ua_id, sizeof(ua_id));
+		write(pipefd[1], &ub_id, sizeof(ub_id));
+		write(pipefd[1], &net_id, sizeof(net_id));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	close(pipefd[1]);
+
+	/* Read all three namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &ua_id, sizeof(ua_id));
+	if (ret != sizeof(ua_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read user_A namespace ID");
+	}
+
+	ret = read(pipefd[0], &ub_id, sizeof(ub_id));
+	if (ret != sizeof(ub_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read user_B namespace ID");
+	}
+
+	ret = read(pipefd[0], &net_id, sizeof(net_id));
+	close(pipefd[0]);
+	if (ret != sizeof(net_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read network namespace ID");
+	}
+
+	/* Construct file handles from namespace IDs */
+	ua_handle = (struct file_handle *)ua_buf;
+	ua_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	ua_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *ua_fh = (struct nsfs_file_handle *)ua_handle->f_handle;
+	ua_fh->ns_id = ua_id;
+	ua_fh->ns_type = 0;
+	ua_fh->ns_inum = 0;
+
+	ub_handle = (struct file_handle *)ub_buf;
+	ub_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	ub_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *ub_fh = (struct nsfs_file_handle *)ub_handle->f_handle;
+	ub_fh->ns_id = ub_id;
+	ub_fh->ns_type = 0;
+	ub_fh->ns_inum = 0;
+
+	net_handle = (struct file_handle *)net_buf;
+	net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	net_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *net_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+	net_fh->ns_id = net_id;
+	net_fh->ns_type = 0;
+	net_fh->ns_inum = 0;
+
+	/* Open net_ns before child exits to keep it active */
+	int net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+	if (net_fd < 0) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open network namespace");
+	}
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* With net_ns active, both user_A and user_B should be active */
+	TH_LOG("Testing user_B active (net_ns active causes propagation)");
+	int ub_fd = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+	ASSERT_GE(ub_fd, 0);
+
+	TH_LOG("Testing user_A active (propagated through user_B)");
+	int ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+	ASSERT_GE(ua_fd, 0);
+
+	/* Close net_ns - user_B should stay active (we hold direct ref) */
+	TH_LOG("Closing net_ns, user_B should remain active (direct ref held)");
+	close(net_fd);
+	int ub_fd2 = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+	ASSERT_GE(ub_fd2, 0);
+	close(ub_fd2);
+
+	/* Close user_B - user_A should stay active (we hold direct ref) */
+	TH_LOG("Closing user_B, user_A should remain active (direct ref held)");
+	close(ub_fd);
+	int ua_fd2 = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+	ASSERT_GE(ua_fd2, 0);
+	close(ua_fd2);
+
+	/* Close user_A - everything should become inactive */
+	TH_LOG("Closing user_A, all should become inactive");
+	close(ua_fd);
+
+	/* All should now be inactive */
+	ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+	ASSERT_LT(ua_fd, 0);
+}
+
+/*
+ * Test that parent stays active as long as ANY child is active.
+ * Create parent user namespace with two child net namespaces.
+ * Parent should remain active until BOTH children are inactive.
+ */
+TEST(ns_parent_multiple_children_refcount)
+{
+	struct file_handle *parent_handle, *net1_handle, *net2_handle;
+	int ret, pipefd[2], syncpipe[2];
+	pid_t pid;
+	int status;
+	__u64 p_id, n1_id, n2_id;
+	char p_buf[sizeof(*parent_handle) + MAX_HANDLE_SZ];
+	char n1_buf[sizeof(*net1_handle) + MAX_HANDLE_SZ];
+	char n2_buf[sizeof(*net2_handle) + MAX_HANDLE_SZ];
+	char sync_byte;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	ASSERT_EQ(pipe(syncpipe), 0);
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(pipefd[0]);
+		close(syncpipe[1]);
+
+		/* Create parent user namespace */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int p_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (p_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(p_fd, NS_GET_ID, &p_id) < 0) {
+			close(p_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(p_fd);
+
+		/* Create first network namespace */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		int n1_fd = open("/proc/self/ns/net", O_RDONLY);
+		if (n1_fd < 0) {
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+		if (ioctl(n1_fd, NS_GET_ID, &n1_id) < 0) {
+			close(n1_fd);
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+		/* Keep n1_fd open so first namespace stays active */
+
+		/* Create second network namespace */
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(n1_fd);
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		int n2_fd = open("/proc/self/ns/net", O_RDONLY);
+		if (n2_fd < 0) {
+			close(n1_fd);
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+		if (ioctl(n2_fd, NS_GET_ID, &n2_id) < 0) {
+			close(n1_fd);
+			close(n2_fd);
+			close(pipefd[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+		/* Keep both n1_fd and n2_fd open */
+
+		/* Send all namespace IDs */
+		write(pipefd[1], &p_id, sizeof(p_id));
+		write(pipefd[1], &n1_id, sizeof(n1_id));
+		write(pipefd[1], &n2_id, sizeof(n2_id));
+		close(pipefd[1]);
+
+		/* Wait for parent to signal before exiting */
+		read(syncpipe[0], &sync_byte, 1);
+		close(syncpipe[0]);
+		exit(0);
+	}
+
+	close(pipefd[1]);
+	close(syncpipe[0]);
+
+	/* Read all three namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &p_id, sizeof(p_id));
+	if (ret != sizeof(p_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read parent namespace ID");
+	}
+
+	ret = read(pipefd[0], &n1_id, sizeof(n1_id));
+	if (ret != sizeof(n1_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read first network namespace ID");
+	}
+
+	ret = read(pipefd[0], &n2_id, sizeof(n2_id));
+	close(pipefd[0]);
+	if (ret != sizeof(n2_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read second network namespace ID");
+	}
+
+	/* Construct file handles from namespace IDs */
+	parent_handle = (struct file_handle *)p_buf;
+	parent_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	parent_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *p_fh = (struct nsfs_file_handle *)parent_handle->f_handle;
+	p_fh->ns_id = p_id;
+	p_fh->ns_type = 0;
+	p_fh->ns_inum = 0;
+
+	net1_handle = (struct file_handle *)n1_buf;
+	net1_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	net1_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *n1_fh = (struct nsfs_file_handle *)net1_handle->f_handle;
+	n1_fh->ns_id = n1_id;
+	n1_fh->ns_type = 0;
+	n1_fh->ns_inum = 0;
+
+	net2_handle = (struct file_handle *)n2_buf;
+	net2_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	net2_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *n2_fh = (struct nsfs_file_handle *)net2_handle->f_handle;
+	n2_fh->ns_id = n2_id;
+	n2_fh->ns_type = 0;
+	n2_fh->ns_inum = 0;
+
+	/* Open both net namespaces while child is still alive */
+	int n1_fd = open_by_handle_at(FD_NSFS_ROOT, net1_handle, O_RDONLY);
+	int n2_fd = open_by_handle_at(FD_NSFS_ROOT, net2_handle, O_RDONLY);
+	if (n1_fd < 0 || n2_fd < 0) {
+		if (n1_fd >= 0) close(n1_fd);
+		if (n2_fd >= 0) close(n2_fd);
+		sync_byte = 'G';
+		write(syncpipe[1], &sync_byte, 1);
+		close(syncpipe[1]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open net namespaces");
+	}
+
+	/* Signal child that we have opened the namespaces */
+	sync_byte = 'G';
+	write(syncpipe[1], &sync_byte, 1);
+	close(syncpipe[1]);
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Parent should be active (has 2 active children) */
+	TH_LOG("Both net namespaces active - parent should be active");
+	int p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+	ASSERT_GE(p_fd, 0);
+	close(p_fd);
+
+	/* Close first net namespace - parent should STILL be active */
+	TH_LOG("Closing first net ns - parent should still be active");
+	close(n1_fd);
+	p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+	ASSERT_GE(p_fd, 0);
+	close(p_fd);
+
+	/* Close second net namespace - parent should become inactive */
+	TH_LOG("Closing second net ns - parent should become inactive");
+	close(n2_fd);
+	p_fd = open_by_handle_at(FD_NSFS_ROOT, parent_handle, O_RDONLY);
+	ASSERT_LT(p_fd, 0);
+}
+
+/*
+ * Test that user namespace as a child also propagates correctly.
+ * Create user_A -> user_B, verify when user_B is active that user_A
+ * is also active. This is different from non-user namespace children.
+ */
+TEST(ns_userns_child_propagation)
+{
+	struct file_handle *ua_handle, *ub_handle;
+	int ret, pipefd[2];
+	pid_t pid;
+	int status;
+	__u64 ua_id, ub_id;
+	char ua_buf[sizeof(*ua_handle) + MAX_HANDLE_SZ];
+	char ub_buf[sizeof(*ub_handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(pipefd[0]);
+
+		/* Create user_A */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int ua_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (ua_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(ua_fd, NS_GET_ID, &ua_id) < 0) {
+			close(ua_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(ua_fd);
+
+		/* Create user_B (child of user_A) */
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int ub_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (ub_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(ub_fd, NS_GET_ID, &ub_id) < 0) {
+			close(ub_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(ub_fd);
+
+		/* Send both namespace IDs */
+		write(pipefd[1], &ua_id, sizeof(ua_id));
+		write(pipefd[1], &ub_id, sizeof(ub_id));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	close(pipefd[1]);
+
+	/* Read both namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &ua_id, sizeof(ua_id));
+	if (ret != sizeof(ua_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read user_A namespace ID");
+	}
+
+	ret = read(pipefd[0], &ub_id, sizeof(ub_id));
+	close(pipefd[0]);
+	if (ret != sizeof(ub_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read user_B namespace ID");
+	}
+
+	/* Construct file handles from namespace IDs */
+	ua_handle = (struct file_handle *)ua_buf;
+	ua_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	ua_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *ua_fh = (struct nsfs_file_handle *)ua_handle->f_handle;
+	ua_fh->ns_id = ua_id;
+	ua_fh->ns_type = 0;
+	ua_fh->ns_inum = 0;
+
+	ub_handle = (struct file_handle *)ub_buf;
+	ub_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	ub_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *ub_fh = (struct nsfs_file_handle *)ub_handle->f_handle;
+	ub_fh->ns_id = ub_id;
+	ub_fh->ns_type = 0;
+	ub_fh->ns_inum = 0;
+
+	/* Open user_B before child exits */
+	int ub_fd = open_by_handle_at(FD_NSFS_ROOT, ub_handle, O_RDONLY);
+	if (ub_fd < 0) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open user_B");
+	}
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* With user_B active, user_A should also be active */
+	TH_LOG("Testing user_A active when child user_B is active");
+	int ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+	ASSERT_GE(ua_fd, 0);
+
+	/* Close user_B */
+	TH_LOG("Closing user_B");
+	close(ub_fd);
+
+	/* user_A should remain active (we hold direct ref) */
+	int ua_fd2 = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+	ASSERT_GE(ua_fd2, 0);
+	close(ua_fd2);
+
+	/* Close user_A - should become inactive */
+	TH_LOG("Closing user_A - should become inactive");
+	close(ua_fd);
+
+	ua_fd = open_by_handle_at(FD_NSFS_ROOT, ua_handle, O_RDONLY);
+	ASSERT_LT(ua_fd, 0);
+}
+
+/*
+ * Test different namespace types (net, uts, ipc) all contributing
+ * active references to the same owning user namespace.
+ */
+TEST(ns_mixed_types_same_owner)
+{
+	struct file_handle *user_handle, *net_handle, *uts_handle;
+	int ret, pipefd[2];
+	pid_t pid;
+	int status;
+	__u64 u_id, n_id, ut_id;
+	char u_buf[sizeof(*user_handle) + MAX_HANDLE_SZ];
+	char n_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+	char ut_buf[sizeof(*uts_handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		close(pipefd[0]);
+
+		if (setup_userns() < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int u_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (u_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(u_fd, NS_GET_ID, &u_id) < 0) {
+			close(u_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(u_fd);
+
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int n_fd = open("/proc/self/ns/net", O_RDONLY);
+		if (n_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(n_fd, NS_GET_ID, &n_id) < 0) {
+			close(n_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(n_fd);
+
+		if (unshare(CLONE_NEWUTS) < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+
+		int ut_fd = open("/proc/self/ns/uts", O_RDONLY);
+		if (ut_fd < 0) {
+			close(pipefd[1]);
+			exit(1);
+		}
+		if (ioctl(ut_fd, NS_GET_ID, &ut_id) < 0) {
+			close(ut_fd);
+			close(pipefd[1]);
+			exit(1);
+		}
+		close(ut_fd);
+
+		/* Send all namespace IDs */
+		write(pipefd[1], &u_id, sizeof(u_id));
+		write(pipefd[1], &n_id, sizeof(n_id));
+		write(pipefd[1], &ut_id, sizeof(ut_id));
+		close(pipefd[1]);
+		exit(0);
+	}
+
+	close(pipefd[1]);
+
+	/* Read all three namespace IDs - fixed size, no parsing needed */
+	ret = read(pipefd[0], &u_id, sizeof(u_id));
+	if (ret != sizeof(u_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read user namespace ID");
+	}
+
+	ret = read(pipefd[0], &n_id, sizeof(n_id));
+	if (ret != sizeof(n_id)) {
+		close(pipefd[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read network namespace ID");
+	}
+
+	ret = read(pipefd[0], &ut_id, sizeof(ut_id));
+	close(pipefd[0]);
+	if (ret != sizeof(ut_id)) {
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read UTS namespace ID");
+	}
+
+	/* Construct file handles from namespace IDs */
+	user_handle = (struct file_handle *)u_buf;
+	user_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	user_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *u_fh = (struct nsfs_file_handle *)user_handle->f_handle;
+	u_fh->ns_id = u_id;
+	u_fh->ns_type = 0;
+	u_fh->ns_inum = 0;
+
+	net_handle = (struct file_handle *)n_buf;
+	net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	net_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *n_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+	n_fh->ns_id = n_id;
+	n_fh->ns_type = 0;
+	n_fh->ns_inum = 0;
+
+	uts_handle = (struct file_handle *)ut_buf;
+	uts_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	uts_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *ut_fh = (struct nsfs_file_handle *)uts_handle->f_handle;
+	ut_fh->ns_id = ut_id;
+	ut_fh->ns_type = 0;
+	ut_fh->ns_inum = 0;
+
+	/* Open both non-user namespaces */
+	int n_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+	int ut_fd = open_by_handle_at(FD_NSFS_ROOT, uts_handle, O_RDONLY);
+	if (n_fd < 0 || ut_fd < 0) {
+		if (n_fd >= 0) close(n_fd);
+		if (ut_fd >= 0) close(ut_fd);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to open namespaces");
+	}
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* User namespace should be active (2 active children) */
+	TH_LOG("Both net and uts active - user ns should be active");
+	int u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+	ASSERT_GE(u_fd, 0);
+	close(u_fd);
+
+	/* Close net - user ns should STILL be active (uts still active) */
+	TH_LOG("Closing net - user ns should still be active");
+	close(n_fd);
+	u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+	ASSERT_GE(u_fd, 0);
+	close(u_fd);
+
+	/* Close uts - user ns should become inactive */
+	TH_LOG("Closing uts - user ns should become inactive");
+	close(ut_fd);
+	u_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+	ASSERT_LT(u_fd, 0);
+}
+
+/* Thread test helpers and structures */
+struct thread_ns_info {
+	__u64 ns_id;
+	int pipefd;
+	int syncfd_read;
+	int syncfd_write;
+	int exit_code;
+};
+
+static void *thread_create_namespace(void *arg)
+{
+	struct thread_ns_info *info = (struct thread_ns_info *)arg;
+	int ret;
+
+	/* Create new network namespace */
+	ret = unshare(CLONE_NEWNET);
+	if (ret < 0) {
+		info->exit_code = 1;
+		return NULL;
+	}
+
+	/* Get namespace ID */
+	int fd = open("/proc/thread-self/ns/net", O_RDONLY);
+	if (fd < 0) {
+		info->exit_code = 2;
+		return NULL;
+	}
+
+	ret = ioctl(fd, NS_GET_ID, &info->ns_id);
+	close(fd);
+	if (ret < 0) {
+		info->exit_code = 3;
+		return NULL;
+	}
+
+	/* Send namespace ID to main thread */
+	if (write(info->pipefd, &info->ns_id, sizeof(info->ns_id)) != sizeof(info->ns_id)) {
+		info->exit_code = 4;
+		return NULL;
+	}
+
+	/* Wait for signal to exit */
+	char sync_byte;
+	if (read(info->syncfd_read, &sync_byte, 1) != 1) {
+		info->exit_code = 5;
+		return NULL;
+	}
+
+	info->exit_code = 0;
+	return NULL;
+}
+
+/*
+ * Test that namespace becomes inactive after thread exits.
+ * This verifies active reference counting works with threads, not just processes.
+ */
+TEST(thread_ns_inactive_after_exit)
+{
+	pthread_t thread;
+	struct thread_ns_info info;
+	struct file_handle *handle;
+	int pipefd[2];
+	int syncpipe[2];
+	int ret;
+	char sync_byte;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	ASSERT_EQ(pipe(syncpipe), 0);
+
+	info.pipefd = pipefd[1];
+	info.syncfd_read = syncpipe[0];
+	info.syncfd_write = -1;
+	info.exit_code = -1;
+
+	/* Create thread that will create a namespace */
+	ret = pthread_create(&thread, NULL, thread_create_namespace, &info);
+	ASSERT_EQ(ret, 0);
+
+	/* Read namespace ID from thread */
+	__u64 ns_id;
+	ret = read(pipefd[0], &ns_id, sizeof(ns_id));
+	if (ret != sizeof(ns_id)) {
+		sync_byte = 'X';
+		write(syncpipe[1], &sync_byte, 1);
+		pthread_join(thread, NULL);
+		close(pipefd[0]);
+		close(pipefd[1]);
+		close(syncpipe[0]);
+		close(syncpipe[1]);
+		SKIP(return, "Failed to read namespace ID from thread");
+	}
+
+	TH_LOG("Thread created namespace with ID %llu", (unsigned long long)ns_id);
+
+	/* Construct file handle */
+	handle = (struct file_handle *)buf;
+	handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *fh = (struct nsfs_file_handle *)handle->f_handle;
+	fh->ns_id = ns_id;
+	fh->ns_type = 0;
+	fh->ns_inum = 0;
+
+	/* Namespace should be active while thread is alive */
+	TH_LOG("Attempting to open namespace while thread is alive (should succeed)");
+	int nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_GE(nsfd, 0);
+	close(nsfd);
+
+	/* Signal thread to exit */
+	TH_LOG("Signaling thread to exit");
+	sync_byte = 'X';
+	ASSERT_EQ(write(syncpipe[1], &sync_byte, 1), 1);
+	close(syncpipe[1]);
+
+	/* Wait for thread to exit */
+	ASSERT_EQ(pthread_join(thread, NULL), 0);
+	close(pipefd[0]);
+	close(pipefd[1]);
+	close(syncpipe[0]);
+
+	if (info.exit_code != 0)
+		SKIP(return, "Thread failed to create namespace");
+
+	TH_LOG("Thread exited, namespace should be inactive");
+
+	/* Namespace should now be inactive */
+	nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(nsfd, 0);
+	/* Should fail with ENOENT (inactive) or ESTALE (gone) */
+	TH_LOG("Namespace inactive as expected: %s (errno=%d)", strerror(errno), errno);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/*
+ * Test that a namespace remains active while a thread holds an fd to it.
+ * Even after the thread exits, the namespace should remain active as long as
+ * another thread holds a file descriptor to it.
+ */
+TEST(thread_ns_fd_keeps_active)
+{
+	pthread_t thread;
+	struct thread_ns_info info;
+	struct file_handle *handle;
+	int pipefd[2];
+	int syncpipe[2];
+	int ret;
+	char sync_byte;
+	char buf[sizeof(*handle) + MAX_HANDLE_SZ];
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	ASSERT_EQ(pipe(syncpipe), 0);
+
+	info.pipefd = pipefd[1];
+	info.syncfd_read = syncpipe[0];
+	info.syncfd_write = -1;
+	info.exit_code = -1;
+
+	/* Create thread that will create a namespace */
+	ret = pthread_create(&thread, NULL, thread_create_namespace, &info);
+	ASSERT_EQ(ret, 0);
+
+	/* Read namespace ID from thread */
+	__u64 ns_id;
+	ret = read(pipefd[0], &ns_id, sizeof(ns_id));
+	if (ret != sizeof(ns_id)) {
+		sync_byte = 'X';
+		write(syncpipe[1], &sync_byte, 1);
+		pthread_join(thread, NULL);
+		close(pipefd[0]);
+		close(pipefd[1]);
+		close(syncpipe[0]);
+		close(syncpipe[1]);
+		SKIP(return, "Failed to read namespace ID from thread");
+	}
+
+	TH_LOG("Thread created namespace with ID %llu", (unsigned long long)ns_id);
+
+	/* Construct file handle */
+	handle = (struct file_handle *)buf;
+	handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *fh = (struct nsfs_file_handle *)handle->f_handle;
+	fh->ns_id = ns_id;
+	fh->ns_type = 0;
+	fh->ns_inum = 0;
+
+	/* Open namespace while thread is alive */
+	TH_LOG("Opening namespace while thread is alive");
+	int nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_GE(nsfd, 0);
+
+	/* Signal thread to exit */
+	TH_LOG("Signaling thread to exit");
+	sync_byte = 'X';
+	write(syncpipe[1], &sync_byte, 1);
+	close(syncpipe[1]);
+
+	/* Wait for thread to exit */
+	pthread_join(thread, NULL);
+	close(pipefd[0]);
+	close(pipefd[1]);
+	close(syncpipe[0]);
+
+	if (info.exit_code != 0) {
+		close(nsfd);
+		SKIP(return, "Thread failed to create namespace");
+	}
+
+	TH_LOG("Thread exited, but main thread holds fd - namespace should remain active");
+
+	/* Namespace should still be active because we hold an fd */
+	int nsfd2 = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_GE(nsfd2, 0);
+
+	/* Verify it's the same namespace */
+	struct stat st1, st2;
+	ASSERT_EQ(fstat(nsfd, &st1), 0);
+	ASSERT_EQ(fstat(nsfd2, &st2), 0);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+	close(nsfd2);
+
+	TH_LOG("Closing fd - namespace should become inactive");
+	close(nsfd);
+
+	/* Now namespace should be inactive */
+	nsfd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(nsfd, 0);
+	/* Should fail with ENOENT (inactive) or ESTALE (gone) */
+	TH_LOG("Namespace inactive as expected: %s (errno=%d)", strerror(errno), errno);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+}
+
+/* Structure for thread data in subprocess */
+struct thread_sleep_data {
+	int syncfd_read;
+};
+
+static void *thread_sleep_and_wait(void *arg)
+{
+	struct thread_sleep_data *data = (struct thread_sleep_data *)arg;
+	char sync_byte;
+
+	/* Wait for signal to exit - read will unblock when pipe is closed */
+	(void)read(data->syncfd_read, &sync_byte, 1);
+	return NULL;
+}
+
+/*
+ * Test that namespaces become inactive after subprocess with multiple threads exits.
+ * Create a subprocess that unshares user and network namespaces, then creates two
+ * threads that share those namespaces. Verify that after all threads and subprocess
+ * exit, the namespaces are no longer listed by listns() and cannot be opened by
+ * open_by_handle_at().
+ */
+TEST(thread_subprocess_ns_inactive_after_all_exit)
+{
+	int pipefd[2];
+	int sv[2];
+	pid_t pid;
+	int status;
+	__u64 user_id, net_id;
+	struct file_handle *user_handle, *net_handle;
+	char user_buf[sizeof(*user_handle) + MAX_HANDLE_SZ];
+	char net_buf[sizeof(*net_handle) + MAX_HANDLE_SZ];
+	char sync_byte;
+	int ret;
+
+	ASSERT_EQ(pipe(pipefd), 0);
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child process */
+		close(pipefd[0]);
+		close(sv[0]);
+
+		/* Create user namespace with mappings */
+		if (setup_userns() < 0) {
+			fprintf(stderr, "Child: setup_userns() failed: %s\n", strerror(errno));
+			close(pipefd[1]);
+			close(sv[1]);
+			exit(1);
+		}
+		fprintf(stderr, "Child: setup_userns() succeeded\n");
+
+		/* Get user namespace ID */
+		int user_fd = open("/proc/self/ns/user", O_RDONLY);
+		if (user_fd < 0) {
+			fprintf(stderr, "Child: open(/proc/self/ns/user) failed: %s\n", strerror(errno));
+			close(pipefd[1]);
+			close(sv[1]);
+			exit(1);
+		}
+
+		if (ioctl(user_fd, NS_GET_ID, &user_id) < 0) {
+			fprintf(stderr, "Child: ioctl(NS_GET_ID) for user ns failed: %s\n", strerror(errno));
+			close(user_fd);
+			close(pipefd[1]);
+			close(sv[1]);
+			exit(1);
+		}
+		close(user_fd);
+		fprintf(stderr, "Child: user ns ID = %llu\n", (unsigned long long)user_id);
+
+		/* Unshare network namespace */
+		if (unshare(CLONE_NEWNET) < 0) {
+			fprintf(stderr, "Child: unshare(CLONE_NEWNET) failed: %s\n", strerror(errno));
+			close(pipefd[1]);
+			close(sv[1]);
+			exit(1);
+		}
+		fprintf(stderr, "Child: unshare(CLONE_NEWNET) succeeded\n");
+
+		/* Get network namespace ID */
+		int net_fd = open("/proc/self/ns/net", O_RDONLY);
+		if (net_fd < 0) {
+			fprintf(stderr, "Child: open(/proc/self/ns/net) failed: %s\n", strerror(errno));
+			close(pipefd[1]);
+			close(sv[1]);
+			exit(1);
+		}
+
+		if (ioctl(net_fd, NS_GET_ID, &net_id) < 0) {
+			fprintf(stderr, "Child: ioctl(NS_GET_ID) for net ns failed: %s\n", strerror(errno));
+			close(net_fd);
+			close(pipefd[1]);
+			close(sv[1]);
+			exit(1);
+		}
+		close(net_fd);
+		fprintf(stderr, "Child: net ns ID = %llu\n", (unsigned long long)net_id);
+
+		/* Send namespace IDs to parent */
+		if (write(pipefd[1], &user_id, sizeof(user_id)) != sizeof(user_id)) {
+			fprintf(stderr, "Child: write(user_id) failed: %s\n", strerror(errno));
+			exit(1);
+		}
+		if (write(pipefd[1], &net_id, sizeof(net_id)) != sizeof(net_id)) {
+			fprintf(stderr, "Child: write(net_id) failed: %s\n", strerror(errno));
+			exit(1);
+		}
+		close(pipefd[1]);
+		fprintf(stderr, "Child: sent namespace IDs to parent\n");
+
+		/* Create two threads that share the namespaces */
+		pthread_t thread1, thread2;
+		struct thread_sleep_data data;
+		data.syncfd_read = sv[1];
+
+		int ret_thread = pthread_create(&thread1, NULL, thread_sleep_and_wait, &data);
+		if (ret_thread != 0) {
+			fprintf(stderr, "Child: pthread_create(thread1) failed: %s\n", strerror(ret_thread));
+			close(sv[1]);
+			exit(1);
+		}
+		fprintf(stderr, "Child: created thread1\n");
+
+		ret_thread = pthread_create(&thread2, NULL, thread_sleep_and_wait, &data);
+		if (ret_thread != 0) {
+			fprintf(stderr, "Child: pthread_create(thread2) failed: %s\n", strerror(ret_thread));
+			close(sv[1]);
+			pthread_cancel(thread1);
+			exit(1);
+		}
+		fprintf(stderr, "Child: created thread2\n");
+
+		/* Wait for threads to complete - they will unblock when parent writes */
+		fprintf(stderr, "Child: waiting for threads to exit\n");
+		pthread_join(thread1, NULL);
+		fprintf(stderr, "Child: thread1 exited\n");
+		pthread_join(thread2, NULL);
+		fprintf(stderr, "Child: thread2 exited\n");
+
+		close(sv[1]);
+
+		/* Exit - namespaces should become inactive */
+		fprintf(stderr, "Child: all threads joined, exiting with success\n");
+		exit(0);
+	}
+
+	/* Parent process */
+	close(pipefd[1]);
+	close(sv[1]);
+
+	TH_LOG("Parent: waiting to read namespace IDs from child");
+
+	/* Read namespace IDs from child */
+	ret = read(pipefd[0], &user_id, sizeof(user_id));
+	if (ret != sizeof(user_id)) {
+		TH_LOG("Parent: failed to read user_id, ret=%d, errno=%s", ret, strerror(errno));
+		close(pipefd[0]);
+		sync_byte = 'X';
+		(void)write(sv[0], &sync_byte, 1);
+		close(sv[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read user namespace ID from child");
+	}
+
+	ret = read(pipefd[0], &net_id, sizeof(net_id));
+	close(pipefd[0]);
+	if (ret != sizeof(net_id)) {
+		TH_LOG("Parent: failed to read net_id, ret=%d, errno=%s", ret, strerror(errno));
+		sync_byte = 'X';
+		(void)write(sv[0], &sync_byte, 1);
+		close(sv[0]);
+		waitpid(pid, NULL, 0);
+		SKIP(return, "Failed to read network namespace ID from child");
+	}
+
+	TH_LOG("Child created user ns %llu and net ns %llu with 2 threads",
+	       (unsigned long long)user_id, (unsigned long long)net_id);
+
+	/* Construct file handles */
+	user_handle = (struct file_handle *)user_buf;
+	user_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	user_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *user_fh = (struct nsfs_file_handle *)user_handle->f_handle;
+	user_fh->ns_id = user_id;
+	user_fh->ns_type = 0;
+	user_fh->ns_inum = 0;
+
+	net_handle = (struct file_handle *)net_buf;
+	net_handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	net_handle->handle_type = FILEID_NSFS;
+	struct nsfs_file_handle *net_fh = (struct nsfs_file_handle *)net_handle->f_handle;
+	net_fh->ns_id = net_id;
+	net_fh->ns_type = 0;
+	net_fh->ns_inum = 0;
+
+	/* Verify namespaces are active while subprocess and threads are alive */
+	TH_LOG("Verifying namespaces are active while subprocess with threads is running");
+	int user_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+	ASSERT_GE(user_fd, 0);
+
+	int net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+	ASSERT_GE(net_fd, 0);
+
+	close(user_fd);
+	close(net_fd);
+
+	/* Also verify they appear in listns() */
+	TH_LOG("Verifying namespaces appear in listns() while active");
+	struct ns_id_req req = {
+		.size = sizeof(struct ns_id_req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	int nr_ids = sys_listns(&req, ns_ids, 256, 0);
+	if (nr_ids < 0) {
+		TH_LOG("listns() not available, skipping listns verification");
+	} else {
+		/* Check if user_id is in the list */
+		int found_user = 0;
+		for (int i = 0; i < nr_ids; i++) {
+			if (ns_ids[i] == user_id) {
+				found_user = 1;
+				break;
+			}
+		}
+		ASSERT_TRUE(found_user);
+		TH_LOG("User namespace found in listns() as expected");
+
+		/* Check network namespace */
+		req.ns_type = CLONE_NEWNET;
+		nr_ids = sys_listns(&req, ns_ids, 256, 0);
+		if (nr_ids >= 0) {
+			int found_net = 0;
+			for (int i = 0; i < nr_ids; i++) {
+				if (ns_ids[i] == net_id) {
+					found_net = 1;
+					break;
+				}
+			}
+			ASSERT_TRUE(found_net);
+			TH_LOG("Network namespace found in listns() as expected");
+		}
+	}
+
+	/* Signal threads to exit */
+	TH_LOG("Signaling threads to exit");
+	sync_byte = 'X';
+	/* Write two bytes - one for each thread */
+	ASSERT_EQ(write(sv[0], &sync_byte, 1), 1);
+	ASSERT_EQ(write(sv[0], &sync_byte, 1), 1);
+	close(sv[0]);
+
+	/* Wait for child process to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	if (WEXITSTATUS(status) != 0) {
+		TH_LOG("Child process failed with exit code %d", WEXITSTATUS(status));
+		SKIP(return, "Child process failed");
+	}
+
+	TH_LOG("Subprocess and all threads have exited successfully");
+
+	/* Verify namespaces are now inactive - open_by_handle_at should fail */
+	TH_LOG("Verifying namespaces are inactive after subprocess and threads exit");
+	user_fd = open_by_handle_at(FD_NSFS_ROOT, user_handle, O_RDONLY);
+	ASSERT_LT(user_fd, 0);
+	TH_LOG("User namespace inactive as expected: %s (errno=%d)",
+	       strerror(errno), errno);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+
+	net_fd = open_by_handle_at(FD_NSFS_ROOT, net_handle, O_RDONLY);
+	ASSERT_LT(net_fd, 0);
+	TH_LOG("Network namespace inactive as expected: %s (errno=%d)",
+	       strerror(errno), errno);
+	ASSERT_TRUE(errno == ENOENT || errno == ESTALE);
+
+	/* Verify namespaces do NOT appear in listns() */
+	TH_LOG("Verifying namespaces do NOT appear in listns() when inactive");
+	memset(&req, 0, sizeof(req));
+	req.size = sizeof(struct ns_id_req);
+	req.ns_type = CLONE_NEWUSER;
+	nr_ids = sys_listns(&req, ns_ids, 256, 0);
+	if (nr_ids >= 0) {
+		int found_user = 0;
+		for (int i = 0; i < nr_ids; i++) {
+			if (ns_ids[i] == user_id) {
+				found_user = 1;
+				break;
+			}
+		}
+		ASSERT_FALSE(found_user);
+		TH_LOG("User namespace correctly not listed in listns()");
+
+		/* Check network namespace */
+		req.ns_type = CLONE_NEWNET;
+		nr_ids = sys_listns(&req, ns_ids, 256, 0);
+		if (nr_ids >= 0) {
+			int found_net = 0;
+			for (int i = 0; i < nr_ids; i++) {
+				if (ns_ids[i] == net_id) {
+					found_net = 1;
+					break;
+				}
+			}
+			ASSERT_FALSE(found_net);
+			TH_LOG("Network namespace correctly not listed in listns()");
+		}
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c
index e28accd74a57..527ade0a8673 100644
--- a/tools/testing/selftests/namespaces/nsid_test.c
+++ b/tools/testing/selftests/namespaces/nsid_test.c
@@ -6,6 +6,7 @@
 #include <libgen.h>
 #include <limits.h>
 #include <pthread.h>
+#include <signal.h>
 #include <string.h>
 #include <sys/mount.h>
 #include <poll.h>
@@ -14,12 +15,30 @@
 #include <sys/stat.h>
 #include <sys/socket.h>
 #include <sys/un.h>
+#include <sys/wait.h>
 #include <unistd.h>
 #include <linux/fs.h>
 #include <linux/limits.h>
 #include <linux/nsfs.h>
 #include "../kselftest_harness.h"
 
+/* Fixture for tests that create child processes */
+FIXTURE(nsid) {
+	pid_t child_pid;
+};
+
+FIXTURE_SETUP(nsid) {
+	self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(nsid) {
+	/* Clean up any child process that may still be running */
+	if (self->child_pid > 0) {
+		kill(self->child_pid, SIGKILL);
+		waitpid(self->child_pid, NULL, 0);
+	}
+}
+
 TEST(nsid_mntns_basic)
 {
 	__u64 mnt_ns_id = 0;
@@ -44,7 +63,7 @@ TEST(nsid_mntns_basic)
 	close(fd_mntns);
 }
 
-TEST(nsid_mntns_separate)
+TEST_F(nsid, mntns_separate)
 {
 	__u64 parent_mnt_ns_id = 0;
 	__u64 child_mnt_ns_id = 0;
@@ -90,6 +109,9 @@ TEST(nsid_mntns_separate)
 		_exit(0);
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -99,8 +121,6 @@ TEST(nsid_mntns_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_mntns);
 		SKIP(return, "No permission to create mount namespace");
 	}
@@ -123,10 +143,6 @@ TEST(nsid_mntns_separate)
 
 	close(fd_parent_mntns);
 	close(fd_child_mntns);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST(nsid_cgroupns_basic)
@@ -153,7 +169,7 @@ TEST(nsid_cgroupns_basic)
 	close(fd_cgroupns);
 }
 
-TEST(nsid_cgroupns_separate)
+TEST_F(nsid, cgroupns_separate)
 {
 	__u64 parent_cgroup_ns_id = 0;
 	__u64 child_cgroup_ns_id = 0;
@@ -199,6 +215,9 @@ TEST(nsid_cgroupns_separate)
 		_exit(0);
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -208,8 +227,6 @@ TEST(nsid_cgroupns_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_cgroupns);
 		SKIP(return, "No permission to create cgroup namespace");
 	}
@@ -232,10 +249,6 @@ TEST(nsid_cgroupns_separate)
 
 	close(fd_parent_cgroupns);
 	close(fd_child_cgroupns);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST(nsid_ipcns_basic)
@@ -262,7 +275,7 @@ TEST(nsid_ipcns_basic)
 	close(fd_ipcns);
 }
 
-TEST(nsid_ipcns_separate)
+TEST_F(nsid, ipcns_separate)
 {
 	__u64 parent_ipc_ns_id = 0;
 	__u64 child_ipc_ns_id = 0;
@@ -308,6 +321,9 @@ TEST(nsid_ipcns_separate)
 		_exit(0);
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -317,8 +333,6 @@ TEST(nsid_ipcns_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_ipcns);
 		SKIP(return, "No permission to create IPC namespace");
 	}
@@ -341,10 +355,6 @@ TEST(nsid_ipcns_separate)
 
 	close(fd_parent_ipcns);
 	close(fd_child_ipcns);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST(nsid_utsns_basic)
@@ -371,7 +381,7 @@ TEST(nsid_utsns_basic)
 	close(fd_utsns);
 }
 
-TEST(nsid_utsns_separate)
+TEST_F(nsid, utsns_separate)
 {
 	__u64 parent_uts_ns_id = 0;
 	__u64 child_uts_ns_id = 0;
@@ -417,6 +427,9 @@ TEST(nsid_utsns_separate)
 		_exit(0);
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -426,8 +439,6 @@ TEST(nsid_utsns_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_utsns);
 		SKIP(return, "No permission to create UTS namespace");
 	}
@@ -450,10 +461,6 @@ TEST(nsid_utsns_separate)
 
 	close(fd_parent_utsns);
 	close(fd_child_utsns);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST(nsid_userns_basic)
@@ -480,7 +487,7 @@ TEST(nsid_userns_basic)
 	close(fd_userns);
 }
 
-TEST(nsid_userns_separate)
+TEST_F(nsid, userns_separate)
 {
 	__u64 parent_user_ns_id = 0;
 	__u64 child_user_ns_id = 0;
@@ -526,6 +533,9 @@ TEST(nsid_userns_separate)
 		_exit(0);
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -535,8 +545,6 @@ TEST(nsid_userns_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_userns);
 		SKIP(return, "No permission to create user namespace");
 	}
@@ -559,10 +567,6 @@ TEST(nsid_userns_separate)
 
 	close(fd_parent_userns);
 	close(fd_child_userns);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST(nsid_timens_basic)
@@ -591,7 +595,7 @@ TEST(nsid_timens_basic)
 	close(fd_timens);
 }
 
-TEST(nsid_timens_separate)
+TEST_F(nsid, timens_separate)
 {
 	__u64 parent_time_ns_id = 0;
 	__u64 child_time_ns_id = 0;
@@ -652,6 +656,9 @@ TEST(nsid_timens_separate)
 		}
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -660,8 +667,6 @@ TEST(nsid_timens_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_timens);
 		close(pipefd[0]);
 		SKIP(return, "Cannot create time namespace");
@@ -689,10 +694,6 @@ TEST(nsid_timens_separate)
 
 	close(fd_parent_timens);
 	close(fd_child_timens);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST(nsid_pidns_basic)
@@ -719,7 +720,7 @@ TEST(nsid_pidns_basic)
 	close(fd_pidns);
 }
 
-TEST(nsid_pidns_separate)
+TEST_F(nsid, pidns_separate)
 {
 	__u64 parent_pid_ns_id = 0;
 	__u64 child_pid_ns_id = 0;
@@ -776,6 +777,9 @@ TEST(nsid_pidns_separate)
 		}
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -784,8 +788,6 @@ TEST(nsid_pidns_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_pidns);
 		close(pipefd[0]);
 		SKIP(return, "No permission to create PID namespace");
@@ -813,10 +815,6 @@ TEST(nsid_pidns_separate)
 
 	close(fd_parent_pidns);
 	close(fd_child_pidns);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST(nsid_netns_basic)
@@ -860,7 +858,7 @@ TEST(nsid_netns_basic)
 	close(fd_netns);
 }
 
-TEST(nsid_netns_separate)
+TEST_F(nsid, netns_separate)
 {
 	__u64 parent_net_ns_id = 0;
 	__u64 parent_netns_cookie = 0;
@@ -920,6 +918,9 @@ TEST(nsid_netns_separate)
 		_exit(0);
 	}
 
+	/* Track child for cleanup */
+	self->child_pid = pid;
+
 	/* Parent process */
 	close(pipefd[1]);
 
@@ -929,8 +930,6 @@ TEST(nsid_netns_separate)
 
 	if (buf == 'S') {
 		/* Child couldn't create namespace, skip test */
-		kill(pid, SIGTERM);
-		waitpid(pid, NULL, 0);
 		close(fd_parent_netns);
 		close(parent_sock);
 		SKIP(return, "No permission to create network namespace");
@@ -977,10 +976,6 @@ TEST(nsid_netns_separate)
 	close(fd_parent_netns);
 	close(fd_child_netns);
 	close(parent_sock);
-
-	/* Clean up child process */
-	kill(pid, SIGTERM);
-	waitpid(pid, NULL, 0);
 }
 
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c
new file mode 100644
index 000000000000..753fd29dffd8
--- /dev/null
+++ b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "../pidfd/pidfd.h"
+#include "../kselftest_harness.h"
+
+/*
+ * Regression tests for the setns(pidfd) active reference counting bug.
+ *
+ * These tests are based on the reproducers that triggered the race condition
+ * fixed by commit 1c465d0518dc ("ns: handle setns(pidfd, ...) cleanly").
+ *
+ * The bug: When using setns() with a pidfd, if the target task exits between
+ * prepare_nsset() and commit_nsset(), the namespaces would become inactive.
+ * Then ns_ref_active_get() would increment from 0 without properly resurrecting
+ * the owner chain, causing active reference count underflows.
+ */
+
+/*
+ * Simple pidfd setns test using create_child()+unshare().
+ *
+ * Without the fix, this would trigger active refcount warnings when the
+ * parent exits after doing setns(pidfd) on a child that has already exited.
+ */
+TEST(simple_pidfd_setns)
+{
+	pid_t child_pid;
+	int pidfd = -1;
+	int ret;
+	int sv[2];
+	char c;
+
+	/* Ignore SIGCHLD for autoreap */
+	ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+	/* Create a child process without namespaces initially */
+	child_pid = create_child(&pidfd, 0);
+	ASSERT_GE(child_pid, 0);
+
+	if (child_pid == 0) {
+		close(sv[0]);
+
+		if (unshare(CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUSER) < 0) {
+			close(sv[1]);
+			_exit(1);
+		}
+
+		/* Signal parent that namespaces are ready */
+		if (write_nointr(sv[1], "1", 1) < 0) {
+			close(sv[1]);
+			_exit(1);
+		}
+
+		close(sv[1]);
+		_exit(0);
+	}
+	ASSERT_GE(pidfd, 0);
+	EXPECT_EQ(close(sv[1]), 0);
+
+	ret = read_nointr(sv[0], &c, 1);
+	ASSERT_EQ(ret, 1);
+	EXPECT_EQ(close(sv[0]), 0);
+
+	/* Set to child's namespaces via pidfd */
+	ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
+	TH_LOG("setns() returned %d", ret);
+	close(pidfd);
+}
+
+/*
+ * Simple pidfd setns test using create_child().
+ *
+ * This variation uses create_child() with namespace flags directly.
+ * Namespaces are created immediately at clone time.
+ */
+TEST(simple_pidfd_setns_clone)
+{
+	pid_t child_pid;
+	int pidfd = -1;
+	int ret;
+
+	/* Ignore SIGCHLD for autoreap */
+	ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
+
+	/* Create a child process with new namespaces using create_child() */
+	child_pid = create_child(&pidfd, CLONE_NEWUSER | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET);
+	ASSERT_GE(child_pid, 0);
+
+	if (child_pid == 0) {
+		/* Child: sleep for a while so parent can setns to us */
+		sleep(2);
+		_exit(0);
+	}
+
+	/* Parent: pidfd was already created by create_child() */
+	ASSERT_GE(pidfd, 0);
+
+	/* Set to child's namespaces via pidfd */
+	ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
+	close(pidfd);
+	TH_LOG("setns() returned %d", ret);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/siocgskns_test.c b/tools/testing/selftests/namespaces/siocgskns_test.c
new file mode 100644
index 000000000000..ba689a22d82f
--- /dev/null
+++ b/tools/testing/selftests/namespaces/siocgskns_test.c
@@ -0,0 +1,1824 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/sockios.h>
+#include <linux/nsfs.h>
+#include <arpa/inet.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+#ifndef SIOCGSKNS
+#define SIOCGSKNS 0x894C
+#endif
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003
+#endif
+
+#ifndef FILEID_NSFS
+#define FILEID_NSFS 0xf1
+#endif
+
+/*
+ * Test basic SIOCGSKNS functionality.
+ * Create a socket and verify SIOCGSKNS returns the correct network namespace.
+ */
+TEST(siocgskns_basic)
+{
+	int sock_fd, netns_fd, current_netns_fd;
+	struct stat st1, st2;
+
+	/* Create a TCP socket */
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_GE(sock_fd, 0);
+
+	/* Use SIOCGSKNS to get network namespace */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	/* Get current network namespace */
+	current_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	ASSERT_GE(current_netns_fd, 0);
+
+	/* Verify they match */
+	ASSERT_EQ(fstat(netns_fd, &st1), 0);
+	ASSERT_EQ(fstat(current_netns_fd, &st2), 0);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+	close(sock_fd);
+	close(netns_fd);
+	close(current_netns_fd);
+}
+
+/*
+ * Test that socket file descriptors keep network namespaces active.
+ * Create a network namespace, create a socket in it, then exit the namespace.
+ * The namespace should remain active while the socket FD is held.
+ */
+TEST(siocgskns_keeps_netns_active)
+{
+	int sock_fd, netns_fd, test_fd;
+	int ipc_sockets[2];
+	pid_t pid;
+	int status;
+	struct stat st;
+
+	EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child: create new netns and socket */
+		close(ipc_sockets[0]);
+
+		if (unshare(CLONE_NEWNET) < 0) {
+			TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		/* Create a socket in the new network namespace */
+		sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+		if (sock_fd < 0) {
+			TH_LOG("socket() failed: %s", strerror(errno));
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		/* Send socket FD to parent via SCM_RIGHTS */
+		struct msghdr msg = {0};
+		struct iovec iov = {0};
+		char buf[1] = {'X'};
+		char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+		iov.iov_base = buf;
+		iov.iov_len = 1;
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = cmsg_buf;
+		msg.msg_controllen = sizeof(cmsg_buf);
+
+		struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+		if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+			close(sock_fd);
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		close(sock_fd);
+		close(ipc_sockets[1]);
+		exit(0);
+	}
+
+	/* Parent: receive socket FD */
+	close(ipc_sockets[1]);
+
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	char buf[1];
+	char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+	iov.iov_base = buf;
+	iov.iov_len = 1;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+	close(ipc_sockets[0]);
+	ASSERT_EQ(n, 1);
+
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	ASSERT_NE(cmsg, NULL);
+	ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS);
+
+	memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Get network namespace from socket */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	ASSERT_EQ(fstat(netns_fd, &st), 0);
+
+	/*
+	 * Namespace should still be active because socket FD keeps it alive.
+	 * Try to access it via /proc/self/fd/<fd>.
+	 */
+	char path[64];
+	snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fd);
+	test_fd = open(path, O_RDONLY);
+	ASSERT_GE(test_fd, 0);
+	close(test_fd);
+	close(netns_fd);
+
+	/* Close socket - namespace should become inactive */
+	close(sock_fd);
+
+	/* Try SIOCGSKNS again - should fail since socket is closed */
+	ASSERT_LT(ioctl(sock_fd, SIOCGSKNS), 0);
+}
+
+/*
+ * Test SIOCGSKNS with different socket types (TCP, UDP, RAW).
+ */
+TEST(siocgskns_socket_types)
+{
+	int sock_tcp, sock_udp, sock_raw;
+	int netns_tcp, netns_udp, netns_raw;
+	struct stat st_tcp, st_udp, st_raw;
+
+	/* TCP socket */
+	sock_tcp = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_GE(sock_tcp, 0);
+
+	/* UDP socket */
+	sock_udp = socket(AF_INET, SOCK_DGRAM, 0);
+	ASSERT_GE(sock_udp, 0);
+
+	/* RAW socket (may require privileges) */
+	sock_raw = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP);
+	if (sock_raw < 0 && (errno == EPERM || errno == EACCES)) {
+		sock_raw = -1; /* Skip raw socket test */
+	}
+
+	/* Test SIOCGSKNS on TCP */
+	netns_tcp = ioctl(sock_tcp, SIOCGSKNS);
+	if (netns_tcp < 0) {
+		close(sock_tcp);
+		close(sock_udp);
+		if (sock_raw >= 0) close(sock_raw);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_tcp, 0);
+	}
+
+	/* Test SIOCGSKNS on UDP */
+	netns_udp = ioctl(sock_udp, SIOCGSKNS);
+	ASSERT_GE(netns_udp, 0);
+
+	/* Test SIOCGSKNS on RAW (if available) */
+	if (sock_raw >= 0) {
+		netns_raw = ioctl(sock_raw, SIOCGSKNS);
+		ASSERT_GE(netns_raw, 0);
+	}
+
+	/* Verify all return the same network namespace */
+	ASSERT_EQ(fstat(netns_tcp, &st_tcp), 0);
+	ASSERT_EQ(fstat(netns_udp, &st_udp), 0);
+	ASSERT_EQ(st_tcp.st_ino, st_udp.st_ino);
+
+	if (sock_raw >= 0) {
+		ASSERT_EQ(fstat(netns_raw, &st_raw), 0);
+		ASSERT_EQ(st_tcp.st_ino, st_raw.st_ino);
+		close(netns_raw);
+		close(sock_raw);
+	}
+
+	close(netns_tcp);
+	close(netns_udp);
+	close(sock_tcp);
+	close(sock_udp);
+}
+
+/*
+ * Test SIOCGSKNS across setns.
+ * Create a socket in netns A, switch to netns B, verify SIOCGSKNS still
+ * returns netns A.
+ */
+TEST(siocgskns_across_setns)
+{
+	int sock_fd, netns_a_fd, netns_b_fd, result_fd;
+	struct stat st_a;
+
+	/* Get current netns (A) */
+	netns_a_fd = open("/proc/self/ns/net", O_RDONLY);
+	ASSERT_GE(netns_a_fd, 0);
+	ASSERT_EQ(fstat(netns_a_fd, &st_a), 0);
+
+	/* Create socket in netns A */
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	ASSERT_GE(sock_fd, 0);
+
+	/* Create new netns (B) */
+	ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+
+	netns_b_fd = open("/proc/self/ns/net", O_RDONLY);
+	ASSERT_GE(netns_b_fd, 0);
+
+	/* Get netns from socket created in A */
+	result_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (result_fd < 0) {
+		close(sock_fd);
+		setns(netns_a_fd, CLONE_NEWNET);
+		close(netns_a_fd);
+		close(netns_b_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(result_fd, 0);
+	}
+
+	/* Verify it still points to netns A */
+	struct stat st_result_stat;
+	ASSERT_EQ(fstat(result_fd, &st_result_stat), 0);
+	ASSERT_EQ(st_a.st_ino, st_result_stat.st_ino);
+
+	close(result_fd);
+	close(sock_fd);
+	close(netns_b_fd);
+
+	/* Restore original netns */
+	ASSERT_EQ(setns(netns_a_fd, CLONE_NEWNET), 0);
+	close(netns_a_fd);
+}
+
+/*
+ * Test SIOCGSKNS fails on non-socket file descriptors.
+ */
+TEST(siocgskns_non_socket)
+{
+	int fd;
+	int pipefd[2];
+
+	/* Test on regular file */
+	fd = open("/dev/null", O_RDONLY);
+	ASSERT_GE(fd, 0);
+
+	ASSERT_LT(ioctl(fd, SIOCGSKNS), 0);
+	ASSERT_TRUE(errno == ENOTTY || errno == EINVAL);
+	close(fd);
+
+	/* Test on pipe */
+	ASSERT_EQ(pipe(pipefd), 0);
+
+	ASSERT_LT(ioctl(pipefd[0], SIOCGSKNS), 0);
+	ASSERT_TRUE(errno == ENOTTY || errno == EINVAL);
+
+	close(pipefd[0]);
+	close(pipefd[1]);
+}
+
+/*
+ * Test multiple sockets keep the same network namespace active.
+ * Create multiple sockets, verify closing some doesn't affect others.
+ */
+TEST(siocgskns_multiple_sockets)
+{
+	int socks[5];
+	int netns_fds[5];
+	int i;
+	struct stat st;
+	ino_t netns_ino;
+
+	/* Create new network namespace */
+	ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+
+	/* Create multiple sockets */
+	for (i = 0; i < 5; i++) {
+		socks[i] = socket(AF_INET, SOCK_STREAM, 0);
+		ASSERT_GE(socks[i], 0);
+	}
+
+	/* Get netns from all sockets */
+	for (i = 0; i < 5; i++) {
+		netns_fds[i] = ioctl(socks[i], SIOCGSKNS);
+		if (netns_fds[i] < 0) {
+			int j;
+			for (j = 0; j <= i; j++) {
+				close(socks[j]);
+				if (j < i && netns_fds[j] >= 0)
+					close(netns_fds[j]);
+			}
+			if (errno == ENOTTY || errno == EINVAL)
+				SKIP(return, "SIOCGSKNS not supported");
+			ASSERT_GE(netns_fds[i], 0);
+		}
+	}
+
+	/* Verify all point to same netns */
+	ASSERT_EQ(fstat(netns_fds[0], &st), 0);
+	netns_ino = st.st_ino;
+
+	for (i = 1; i < 5; i++) {
+		ASSERT_EQ(fstat(netns_fds[i], &st), 0);
+		ASSERT_EQ(st.st_ino, netns_ino);
+	}
+
+	/* Close some sockets */
+	for (i = 0; i < 3; i++) {
+		close(socks[i]);
+	}
+
+	/* Remaining netns FDs should still be valid */
+	for (i = 3; i < 5; i++) {
+		char path[64];
+		snprintf(path, sizeof(path), "/proc/self/fd/%d", netns_fds[i]);
+		int test_fd = open(path, O_RDONLY);
+		ASSERT_GE(test_fd, 0);
+		close(test_fd);
+	}
+
+	/* Cleanup */
+	for (i = 0; i < 5; i++) {
+		if (i >= 3)
+			close(socks[i]);
+		close(netns_fds[i]);
+	}
+}
+
+/*
+ * Test socket keeps netns active after creating process exits.
+ * Verify that as long as the socket FD exists, the namespace remains active.
+ */
+TEST(siocgskns_netns_lifecycle)
+{
+	int sock_fd, netns_fd;
+	int ipc_sockets[2];
+	int syncpipe[2];
+	pid_t pid;
+	int status;
+	char sync_byte;
+	struct stat st;
+	ino_t netns_ino;
+
+	EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	ASSERT_EQ(pipe(syncpipe), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child */
+		close(ipc_sockets[0]);
+		close(syncpipe[1]);
+
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(ipc_sockets[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+		if (sock_fd < 0) {
+			close(ipc_sockets[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		/* Send socket to parent */
+		struct msghdr msg = {0};
+		struct iovec iov = {0};
+		char buf[1] = {'X'};
+		char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+		iov.iov_base = buf;
+		iov.iov_len = 1;
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = cmsg_buf;
+		msg.msg_controllen = sizeof(cmsg_buf);
+
+		struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+		if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+			close(sock_fd);
+			close(ipc_sockets[1]);
+			close(syncpipe[0]);
+			exit(1);
+		}
+
+		close(sock_fd);
+		close(ipc_sockets[1]);
+
+		/* Wait for parent signal */
+		read(syncpipe[0], &sync_byte, 1);
+		close(syncpipe[0]);
+		exit(0);
+	}
+
+	/* Parent */
+	close(ipc_sockets[1]);
+	close(syncpipe[0]);
+
+	/* Receive socket FD */
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	char buf[1];
+	char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+	iov.iov_base = buf;
+	iov.iov_len = 1;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+	close(ipc_sockets[0]);
+	ASSERT_EQ(n, 1);
+
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	ASSERT_NE(cmsg, NULL);
+	memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+	/* Get netns from socket while child is alive */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		sync_byte = 'G';
+		write(syncpipe[1], &sync_byte, 1);
+		close(syncpipe[1]);
+		close(sock_fd);
+		waitpid(pid, NULL, 0);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+	ASSERT_EQ(fstat(netns_fd, &st), 0);
+	netns_ino = st.st_ino;
+
+	/* Signal child to exit */
+	sync_byte = 'G';
+	write(syncpipe[1], &sync_byte, 1);
+	close(syncpipe[1]);
+
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+
+	/*
+	 * Socket FD should still keep namespace active even after
+	 * the creating process exited.
+	 */
+	int test_fd = ioctl(sock_fd, SIOCGSKNS);
+	ASSERT_GE(test_fd, 0);
+
+	struct stat st_test;
+	ASSERT_EQ(fstat(test_fd, &st_test), 0);
+	ASSERT_EQ(st_test.st_ino, netns_ino);
+
+	close(test_fd);
+	close(netns_fd);
+
+	/* Close socket - namespace should become inactive */
+	close(sock_fd);
+}
+
+/*
+ * Test IPv6 sockets also work with SIOCGSKNS.
+ */
+TEST(siocgskns_ipv6)
+{
+	int sock_fd, netns_fd, current_netns_fd;
+	struct stat st1, st2;
+
+	/* Create an IPv6 TCP socket */
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	ASSERT_GE(sock_fd, 0);
+
+	/* Use SIOCGSKNS */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	/* Verify it matches current namespace */
+	current_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	ASSERT_GE(current_netns_fd, 0);
+
+	ASSERT_EQ(fstat(netns_fd, &st1), 0);
+	ASSERT_EQ(fstat(current_netns_fd, &st2), 0);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+
+	close(sock_fd);
+	close(netns_fd);
+	close(current_netns_fd);
+}
+
+/*
+ * Test that socket-kept netns appears in listns() output.
+ * Verify that a network namespace kept alive by a socket FD appears in
+ * listns() output even after the creating process exits, and that it
+ * disappears when the socket is closed.
+ */
+TEST(siocgskns_listns_visibility)
+{
+	int sock_fd, netns_fd, owner_fd;
+	int ipc_sockets[2];
+	pid_t pid;
+	int status;
+	__u64 netns_id, owner_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWNET,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	int ret, i;
+	bool found_netns = false;
+
+	EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child: create new netns and socket */
+		close(ipc_sockets[0]);
+
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+		if (sock_fd < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		/* Send socket FD to parent via SCM_RIGHTS */
+		struct msghdr msg = {0};
+		struct iovec iov = {0};
+		char buf[1] = {'X'};
+		char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+		iov.iov_base = buf;
+		iov.iov_len = 1;
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = cmsg_buf;
+		msg.msg_controllen = sizeof(cmsg_buf);
+
+		struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+		if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+			close(sock_fd);
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		close(sock_fd);
+		close(ipc_sockets[1]);
+		exit(0);
+	}
+
+	/* Parent: receive socket FD */
+	close(ipc_sockets[1]);
+
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	char buf[1];
+	char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+	iov.iov_base = buf;
+	iov.iov_len = 1;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+	close(ipc_sockets[0]);
+	ASSERT_EQ(n, 1);
+
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	ASSERT_NE(cmsg, NULL);
+	memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Get network namespace from socket */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	/* Get namespace ID */
+	ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+	if (ret < 0) {
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "NS_GET_ID not supported");
+		ASSERT_EQ(ret, 0);
+	}
+
+	/* Get owner user namespace */
+	owner_fd = ioctl(netns_fd, NS_GET_USERNS);
+	if (owner_fd < 0) {
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "NS_GET_USERNS not supported");
+		ASSERT_GE(owner_fd, 0);
+	}
+
+	/* Get owner namespace ID */
+	ret = ioctl(owner_fd, NS_GET_ID, &owner_id);
+	if (ret < 0) {
+		close(owner_fd);
+		close(sock_fd);
+		close(netns_fd);
+		ASSERT_EQ(ret, 0);
+	}
+	close(owner_fd);
+
+	/* Namespace should appear in listns() output */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s", strerror(errno));
+		ASSERT_GE(ret, 0);
+	}
+
+	/* Search for our network namespace in the list */
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_id) {
+			found_netns = true;
+			break;
+		}
+	}
+
+	ASSERT_TRUE(found_netns);
+	TH_LOG("Found netns %llu in listns() output (kept alive by socket)", netns_id);
+
+	/* Now verify with owner filtering */
+	req.user_ns_id = owner_id;
+	found_netns = false;
+
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	ASSERT_GE(ret, 0);
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_id) {
+			found_netns = true;
+			break;
+		}
+	}
+
+	ASSERT_TRUE(found_netns);
+	TH_LOG("Found netns %llu owned by userns %llu", netns_id, owner_id);
+
+	/* Close socket - namespace should become inactive and disappear from listns() */
+	close(sock_fd);
+	close(netns_fd);
+
+	/* Verify it's no longer in listns() output */
+	req.user_ns_id = 0;
+	found_netns = false;
+
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	ASSERT_GE(ret, 0);
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_id) {
+			found_netns = true;
+			break;
+		}
+	}
+
+	ASSERT_FALSE(found_netns);
+	TH_LOG("Netns %llu correctly disappeared from listns() after socket closed", netns_id);
+}
+
+/*
+ * Test that socket-kept netns can be reopened via file handle.
+ * Verify that a network namespace kept alive by a socket FD can be
+ * reopened using file handles even after the creating process exits.
+ */
+TEST(siocgskns_file_handle)
+{
+	int sock_fd, netns_fd, reopened_fd;
+	int ipc_sockets[2];
+	pid_t pid;
+	int status;
+	struct stat st1, st2;
+	ino_t netns_ino;
+	__u64 netns_id;
+	struct file_handle *handle;
+	struct nsfs_file_handle *nsfs_fh;
+	int ret;
+
+	/* Allocate file_handle structure for nsfs */
+	handle = malloc(sizeof(struct file_handle) + sizeof(struct nsfs_file_handle));
+	ASSERT_NE(handle, NULL);
+	handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	handle->handle_type = FILEID_NSFS;
+
+	EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child: create new netns and socket */
+		close(ipc_sockets[0]);
+
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+		if (sock_fd < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		/* Send socket FD to parent via SCM_RIGHTS */
+		struct msghdr msg = {0};
+		struct iovec iov = {0};
+		char buf[1] = {'X'};
+		char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+		iov.iov_base = buf;
+		iov.iov_len = 1;
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = cmsg_buf;
+		msg.msg_controllen = sizeof(cmsg_buf);
+
+		struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+		if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+			close(sock_fd);
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		close(sock_fd);
+		close(ipc_sockets[1]);
+		exit(0);
+	}
+
+	/* Parent: receive socket FD */
+	close(ipc_sockets[1]);
+
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	char buf[1];
+	char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+	iov.iov_base = buf;
+	iov.iov_len = 1;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+	close(ipc_sockets[0]);
+	ASSERT_EQ(n, 1);
+
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	ASSERT_NE(cmsg, NULL);
+	memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Get network namespace from socket */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	ASSERT_EQ(fstat(netns_fd, &st1), 0);
+	netns_ino = st1.st_ino;
+
+	/* Get namespace ID */
+	ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+	if (ret < 0) {
+		free(handle);
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "NS_GET_ID not supported");
+		ASSERT_EQ(ret, 0);
+	}
+
+	/* Construct file handle from namespace ID */
+	nsfs_fh = (struct nsfs_file_handle *)handle->f_handle;
+	nsfs_fh->ns_id = netns_id;
+	nsfs_fh->ns_type = 0;  /* Type field not needed for reopening */
+	nsfs_fh->ns_inum = 0;  /* Inum field not needed for reopening */
+
+	TH_LOG("Constructed file handle for netns %lu (id=%llu)", netns_ino, netns_id);
+
+	/* Reopen namespace using file handle (while socket still keeps it alive) */
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	if (reopened_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+			SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+		TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+		ASSERT_GE(reopened_fd, 0);
+	}
+
+	/* Verify it's the same namespace */
+	ASSERT_EQ(fstat(reopened_fd, &st2), 0);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+	ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+	TH_LOG("Successfully reopened netns %lu via file handle", netns_ino);
+
+	close(reopened_fd);
+
+	/* Close the netns FD */
+	close(netns_fd);
+
+	/* Try to reopen via file handle - should fail since namespace is now inactive */
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(reopened_fd, 0);
+	TH_LOG("Correctly failed to reopen inactive netns: %s", strerror(errno));
+
+	/* Get network namespace from socket */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	/* Reopen namespace using file handle (while socket still keeps it alive) */
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	if (reopened_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+			SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+		TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+		ASSERT_GE(reopened_fd, 0);
+	}
+
+	/* Verify it's the same namespace */
+	ASSERT_EQ(fstat(reopened_fd, &st2), 0);
+	ASSERT_EQ(st1.st_ino, st2.st_ino);
+	ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+	TH_LOG("Successfully reopened netns %lu via file handle", netns_ino);
+
+	/* Close socket - namespace should become inactive */
+	close(sock_fd);
+	free(handle);
+}
+
+/*
+ * Test combined listns() and file handle operations with socket-kept netns.
+ * Create a netns, keep it alive with a socket, verify it appears in listns(),
+ * then reopen it via file handle obtained from listns() entry.
+ */
+TEST(siocgskns_listns_and_file_handle)
+{
+	int sock_fd, netns_fd, userns_fd, reopened_fd;
+	int ipc_sockets[2];
+	pid_t pid;
+	int status;
+	struct stat st;
+	ino_t netns_ino;
+	__u64 netns_id, userns_id;
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWNET | CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	int ret, i;
+	bool found_netns = false, found_userns = false;
+	struct file_handle *handle;
+	struct nsfs_file_handle *nsfs_fh;
+
+	/* Allocate file_handle structure for nsfs */
+	handle = malloc(sizeof(struct file_handle) + sizeof(struct nsfs_file_handle));
+	ASSERT_NE(handle, NULL);
+	handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	handle->handle_type = FILEID_NSFS;
+
+	EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		/* Child: create new userns and netns with socket */
+		close(ipc_sockets[0]);
+
+		if (setup_userns() < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		if (unshare(CLONE_NEWNET) < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+		if (sock_fd < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		/* Send socket FD to parent via SCM_RIGHTS */
+		struct msghdr msg = {0};
+		struct iovec iov = {0};
+		char buf[1] = {'X'};
+		char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+		iov.iov_base = buf;
+		iov.iov_len = 1;
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = cmsg_buf;
+		msg.msg_controllen = sizeof(cmsg_buf);
+
+		struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+		if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+			close(sock_fd);
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		close(sock_fd);
+		close(ipc_sockets[1]);
+		exit(0);
+	}
+
+	/* Parent: receive socket FD */
+	close(ipc_sockets[1]);
+
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	char buf[1];
+	char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+	iov.iov_base = buf;
+	iov.iov_len = 1;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+	close(ipc_sockets[0]);
+	ASSERT_EQ(n, 1);
+
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	ASSERT_NE(cmsg, NULL);
+	memcpy(&sock_fd, CMSG_DATA(cmsg), sizeof(int));
+
+	/* Wait for child to exit */
+	waitpid(pid, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/* Get network namespace from socket */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	ASSERT_EQ(fstat(netns_fd, &st), 0);
+	netns_ino = st.st_ino;
+
+	/* Get namespace ID */
+	ret = ioctl(netns_fd, NS_GET_ID, &netns_id);
+	if (ret < 0) {
+		free(handle);
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "NS_GET_ID not supported");
+		ASSERT_EQ(ret, 0);
+	}
+
+	/* Get owner user namespace */
+	userns_fd = ioctl(netns_fd, NS_GET_USERNS);
+	if (userns_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "NS_GET_USERNS not supported");
+		ASSERT_GE(userns_fd, 0);
+	}
+
+	/* Get owner namespace ID */
+	ret = ioctl(userns_fd, NS_GET_ID, &userns_id);
+	if (ret < 0) {
+		close(userns_fd);
+		free(handle);
+		close(sock_fd);
+		close(netns_fd);
+		ASSERT_EQ(ret, 0);
+	}
+	close(userns_fd);
+
+	TH_LOG("Testing netns %lu (id=%llu) owned by userns id=%llu", netns_ino, netns_id, userns_id);
+
+	/* Verify namespace appears in listns() */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		free(handle);
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s", strerror(errno));
+		ASSERT_GE(ret, 0);
+	}
+
+	found_netns = false;
+	found_userns = false;
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_id)
+			found_netns = true;
+		if (ns_ids[i] == userns_id)
+			found_userns = true;
+	}
+	ASSERT_TRUE(found_netns);
+	ASSERT_TRUE(found_userns);
+	TH_LOG("Found netns %llu in listns() output", netns_id);
+
+	/* Construct file handle from namespace ID */
+	nsfs_fh = (struct nsfs_file_handle *)handle->f_handle;
+	nsfs_fh->ns_id = netns_id;
+	nsfs_fh->ns_type = 0;
+	nsfs_fh->ns_inum = 0;
+
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	if (reopened_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+			SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+		TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+		ASSERT_GE(reopened_fd, 0);
+	}
+
+	struct stat reopened_st;
+	ASSERT_EQ(fstat(reopened_fd, &reopened_st), 0);
+	ASSERT_EQ(reopened_st.st_ino, netns_ino);
+
+	TH_LOG("Successfully reopened netns %lu via file handle (socket-kept)", netns_ino);
+
+	close(reopened_fd);
+	close(netns_fd);
+
+	/* Try to reopen via file handle - should fail since namespace is now inactive */
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	ASSERT_LT(reopened_fd, 0);
+	TH_LOG("Correctly failed to reopen inactive netns: %s", strerror(errno));
+
+	/* Get network namespace from socket */
+	netns_fd = ioctl(sock_fd, SIOCGSKNS);
+	if (netns_fd < 0) {
+		free(handle);
+		close(sock_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_fd, 0);
+	}
+
+	/* Verify namespace appears in listns() */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		free(handle);
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s", strerror(errno));
+		ASSERT_GE(ret, 0);
+	}
+
+	found_netns = false;
+	found_userns = false;
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_id)
+			found_netns = true;
+		if (ns_ids[i] == userns_id)
+			found_userns = true;
+	}
+	ASSERT_TRUE(found_netns);
+	ASSERT_TRUE(found_userns);
+	TH_LOG("Found netns %llu in listns() output", netns_id);
+
+	close(netns_fd);
+
+	/* Verify namespace appears in listns() */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		free(handle);
+		close(sock_fd);
+		close(netns_fd);
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		TH_LOG("listns failed: %s", strerror(errno));
+		ASSERT_GE(ret, 0);
+	}
+
+	found_netns = false;
+	found_userns = false;
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_id)
+			found_netns = true;
+		if (ns_ids[i] == userns_id)
+			found_userns = true;
+	}
+	ASSERT_FALSE(found_netns);
+	ASSERT_FALSE(found_userns);
+	TH_LOG("Netns %llu correctly disappeared from listns() after socket closed", netns_id);
+
+	close(sock_fd);
+	free(handle);
+}
+
+/*
+ * Test multi-level namespace resurrection across three user namespace levels.
+ *
+ * This test creates a complex namespace hierarchy with three levels of user
+ * namespaces and a network namespace at the deepest level. It verifies that
+ * the resurrection semantics work correctly when SIOCGSKNS is called on a
+ * socket from an inactive namespace tree, and that listns() and
+ * open_by_handle_at() correctly respect visibility rules.
+ *
+ * Hierarchy after child processes exit (all with 0 active refcount):
+ *
+ *          net_L3A (0)                <- Level 3 network namespace
+ *              |
+ *              +
+ *          userns_L3 (0)              <- Level 3 user namespace
+ *              |
+ *              +
+ *          userns_L2 (0)              <- Level 2 user namespace
+ *              |
+ *              +
+ *          userns_L1 (0)              <- Level 1 user namespace
+ *              |
+ *              x
+ *          init_user_ns
+ *
+ * The test verifies:
+ * 1. SIOCGSKNS on a socket from inactive net_L3A resurrects the entire chain
+ * 2. After resurrection, all namespaces are visible in listns()
+ * 3. Resurrected namespaces can be reopened via file handles
+ * 4. Closing the netns FD cascades down: the entire ownership chain
+ *    (userns_L3 -> userns_L2 -> userns_L1) becomes inactive again
+ * 5. Inactive namespaces disappear from listns() and cannot be reopened
+ * 6. Calling SIOCGSKNS again on the same socket resurrects the tree again
+ * 7. After second resurrection, namespaces are visible and can be reopened
+ */
+TEST(siocgskns_multilevel_resurrection)
+{
+	int ipc_sockets[2];
+	pid_t pid_l1, pid_l2, pid_l3;
+	int status;
+
+	/* Namespace file descriptors to be received from child */
+	int sock_L3A_fd = -1;
+	int netns_L3A_fd = -1;
+	__u64 netns_L3A_id;
+	__u64 userns_L1_id, userns_L2_id, userns_L3_id;
+
+	/* For listns() and file handle testing */
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWNET | CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids[256];
+	int ret, i;
+	struct file_handle *handle;
+	struct nsfs_file_handle *nsfs_fh;
+	int reopened_fd;
+
+	/* Allocate file handle for testing */
+	handle = malloc(sizeof(struct file_handle) + sizeof(struct nsfs_file_handle));
+	ASSERT_NE(handle, NULL);
+	handle->handle_bytes = sizeof(struct nsfs_file_handle);
+	handle->handle_type = FILEID_NSFS;
+
+	EXPECT_EQ(socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0);
+
+	/*
+	 * Fork level 1 child that creates userns_L1
+	 */
+	pid_l1 = fork();
+	ASSERT_GE(pid_l1, 0);
+
+	if (pid_l1 == 0) {
+		/* Level 1 child */
+		int ipc_L2[2];
+		close(ipc_sockets[0]);
+
+		/* Create userns_L1 */
+		if (setup_userns() < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		/* Create socketpair for communicating with L2 child */
+		if (socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_L2) < 0) {
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		/*
+		 * Fork level 2 child that creates userns_L2
+		 */
+		pid_l2 = fork();
+		if (pid_l2 < 0) {
+			close(ipc_sockets[1]);
+			close(ipc_L2[0]);
+			close(ipc_L2[1]);
+			exit(1);
+		}
+
+		if (pid_l2 == 0) {
+			/* Level 2 child */
+			int ipc_L3[2];
+			close(ipc_L2[0]);
+
+			/* Create userns_L2 (nested inside userns_L1) */
+			if (setup_userns() < 0) {
+				close(ipc_L2[1]);
+				exit(1);
+			}
+
+			/* Create socketpair for communicating with L3 child */
+			if (socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_L3) < 0) {
+				close(ipc_L2[1]);
+				exit(1);
+			}
+
+			/*
+			 * Fork level 3 child that creates userns_L3 and network namespaces
+			 */
+			pid_l3 = fork();
+			if (pid_l3 < 0) {
+				close(ipc_L2[1]);
+				close(ipc_L3[0]);
+				close(ipc_L3[1]);
+				exit(1);
+			}
+
+			if (pid_l3 == 0) {
+				/* Level 3 child - the deepest level */
+				int sock_fd;
+				close(ipc_L3[0]);
+
+				/* Create userns_L3 (nested inside userns_L2) */
+				if (setup_userns() < 0) {
+					close(ipc_L3[1]);
+					exit(1);
+				}
+
+				/* Create network namespace at level 3 */
+				if (unshare(CLONE_NEWNET) < 0) {
+					close(ipc_L3[1]);
+					exit(1);
+				}
+
+				/* Create socket in net_L3A */
+				sock_fd = socket(AF_INET, SOCK_DGRAM, 0);
+				if (sock_fd < 0) {
+					close(ipc_L3[1]);
+					exit(1);
+				}
+
+				/* Send socket FD to L2 parent */
+				struct msghdr msg = {0};
+				struct iovec iov = {0};
+				char buf[1] = {'X'};
+				char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+				iov.iov_base = buf;
+				iov.iov_len = 1;
+				msg.msg_iov = &iov;
+				msg.msg_iovlen = 1;
+				msg.msg_control = cmsg_buf;
+				msg.msg_controllen = sizeof(cmsg_buf);
+
+				struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+				cmsg->cmsg_level = SOL_SOCKET;
+				cmsg->cmsg_type = SCM_RIGHTS;
+				cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+				memcpy(CMSG_DATA(cmsg), &sock_fd, sizeof(int));
+
+				if (sendmsg(ipc_L3[1], &msg, 0) < 0) {
+					close(sock_fd);
+					close(ipc_L3[1]);
+					exit(1);
+				}
+
+				close(sock_fd);
+				close(ipc_L3[1]);
+				exit(0);
+			}
+
+			/* Level 2 child - receive from L3 and forward to L1 */
+			close(ipc_L3[1]);
+
+			struct msghdr msg = {0};
+			struct iovec iov = {0};
+			char buf[1];
+			char cmsg_buf[CMSG_SPACE(sizeof(int))];
+			int received_fd;
+
+			iov.iov_base = buf;
+			iov.iov_len = 1;
+			msg.msg_iov = &iov;
+			msg.msg_iovlen = 1;
+			msg.msg_control = cmsg_buf;
+			msg.msg_controllen = sizeof(cmsg_buf);
+
+			ssize_t n = recvmsg(ipc_L3[0], &msg, 0);
+			close(ipc_L3[0]);
+
+			if (n != 1) {
+				close(ipc_L2[1]);
+				waitpid(pid_l3, NULL, 0);
+				exit(1);
+			}
+
+			struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+			if (!cmsg) {
+				close(ipc_L2[1]);
+				waitpid(pid_l3, NULL, 0);
+				exit(1);
+			}
+			memcpy(&received_fd, CMSG_DATA(cmsg), sizeof(int));
+
+			/* Wait for L3 child */
+			waitpid(pid_l3, NULL, 0);
+
+			/* Forward the socket FD to L1 parent */
+			memset(&msg, 0, sizeof(msg));
+			buf[0] = 'Y';
+			iov.iov_base = buf;
+			iov.iov_len = 1;
+			msg.msg_iov = &iov;
+			msg.msg_iovlen = 1;
+			msg.msg_control = cmsg_buf;
+			msg.msg_controllen = sizeof(cmsg_buf);
+
+			cmsg = CMSG_FIRSTHDR(&msg);
+			cmsg->cmsg_level = SOL_SOCKET;
+			cmsg->cmsg_type = SCM_RIGHTS;
+			cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+			memcpy(CMSG_DATA(cmsg), &received_fd, sizeof(int));
+
+			if (sendmsg(ipc_L2[1], &msg, 0) < 0) {
+				close(received_fd);
+				close(ipc_L2[1]);
+				exit(1);
+			}
+
+			close(received_fd);
+			close(ipc_L2[1]);
+			exit(0);
+		}
+
+		/* Level 1 child - receive from L2 and forward to parent */
+		close(ipc_L2[1]);
+
+		struct msghdr msg = {0};
+		struct iovec iov = {0};
+		char buf[1];
+		char cmsg_buf[CMSG_SPACE(sizeof(int))];
+		int received_fd;
+
+		iov.iov_base = buf;
+		iov.iov_len = 1;
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = cmsg_buf;
+		msg.msg_controllen = sizeof(cmsg_buf);
+
+		ssize_t n = recvmsg(ipc_L2[0], &msg, 0);
+		close(ipc_L2[0]);
+
+		if (n != 1) {
+			close(ipc_sockets[1]);
+			waitpid(pid_l2, NULL, 0);
+			exit(1);
+		}
+
+		struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+		if (!cmsg) {
+			close(ipc_sockets[1]);
+			waitpid(pid_l2, NULL, 0);
+			exit(1);
+		}
+		memcpy(&received_fd, CMSG_DATA(cmsg), sizeof(int));
+
+		/* Wait for L2 child */
+		waitpid(pid_l2, NULL, 0);
+
+		/* Forward the socket FD to parent */
+		memset(&msg, 0, sizeof(msg));
+		buf[0] = 'Z';
+		iov.iov_base = buf;
+		iov.iov_len = 1;
+		msg.msg_iov = &iov;
+		msg.msg_iovlen = 1;
+		msg.msg_control = cmsg_buf;
+		msg.msg_controllen = sizeof(cmsg_buf);
+
+		cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+		memcpy(CMSG_DATA(cmsg), &received_fd, sizeof(int));
+
+		if (sendmsg(ipc_sockets[1], &msg, 0) < 0) {
+			close(received_fd);
+			close(ipc_sockets[1]);
+			exit(1);
+		}
+
+		close(received_fd);
+		close(ipc_sockets[1]);
+		exit(0);
+	}
+
+	/* Parent - receive the socket from the deepest level */
+	close(ipc_sockets[1]);
+
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	char buf[1];
+	char cmsg_buf[CMSG_SPACE(sizeof(int))];
+
+	iov.iov_base = buf;
+	iov.iov_len = 1;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	ssize_t n = recvmsg(ipc_sockets[0], &msg, 0);
+	close(ipc_sockets[0]);
+
+	if (n != 1) {
+		free(handle);
+		waitpid(pid_l1, NULL, 0);
+		SKIP(return, "Failed to receive socket from child");
+	}
+
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	if (!cmsg) {
+		free(handle);
+		waitpid(pid_l1, NULL, 0);
+		SKIP(return, "Failed to receive socket from child");
+	}
+	memcpy(&sock_L3A_fd, CMSG_DATA(cmsg), sizeof(int));
+
+	/* Wait for L1 child */
+	waitpid(pid_l1, &status, 0);
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(WEXITSTATUS(status), 0);
+
+	/*
+	 * At this point, all child processes have exited. The socket itself
+	 * doesn't keep the namespace active - we need to call SIOCGSKNS which
+	 * will resurrect the entire namespace tree by taking active references.
+	 */
+
+	/* Get network namespace from socket - this resurrects the tree */
+	netns_L3A_fd = ioctl(sock_L3A_fd, SIOCGSKNS);
+	if (netns_L3A_fd < 0) {
+		free(handle);
+		close(sock_L3A_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "SIOCGSKNS not supported");
+		ASSERT_GE(netns_L3A_fd, 0);
+	}
+
+	/* Get namespace ID for net_L3A */
+	ret = ioctl(netns_L3A_fd, NS_GET_ID, &netns_L3A_id);
+	if (ret < 0) {
+		free(handle);
+		close(sock_L3A_fd);
+		close(netns_L3A_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "NS_GET_ID not supported");
+		ASSERT_EQ(ret, 0);
+	}
+
+	/* Get owner user namespace chain: userns_L3 -> userns_L2 -> userns_L1 */
+	int userns_L3_fd = ioctl(netns_L3A_fd, NS_GET_USERNS);
+	if (userns_L3_fd < 0) {
+		free(handle);
+		close(sock_L3A_fd);
+		close(netns_L3A_fd);
+		if (errno == ENOTTY || errno == EINVAL)
+			SKIP(return, "NS_GET_USERNS not supported");
+		ASSERT_GE(userns_L3_fd, 0);
+	}
+
+	ret = ioctl(userns_L3_fd, NS_GET_ID, &userns_L3_id);
+	ASSERT_EQ(ret, 0);
+
+	int userns_L2_fd = ioctl(userns_L3_fd, NS_GET_USERNS);
+	ASSERT_GE(userns_L2_fd, 0);
+	ret = ioctl(userns_L2_fd, NS_GET_ID, &userns_L2_id);
+	ASSERT_EQ(ret, 0);
+
+	int userns_L1_fd = ioctl(userns_L2_fd, NS_GET_USERNS);
+	ASSERT_GE(userns_L1_fd, 0);
+	ret = ioctl(userns_L1_fd, NS_GET_ID, &userns_L1_id);
+	ASSERT_EQ(ret, 0);
+
+	close(userns_L1_fd);
+	close(userns_L2_fd);
+	close(userns_L3_fd);
+
+	TH_LOG("Multi-level hierarchy: net_L3A (id=%llu) -> userns_L3 (id=%llu) -> userns_L2 (id=%llu) -> userns_L1 (id=%llu)",
+	       netns_L3A_id, userns_L3_id, userns_L2_id, userns_L1_id);
+
+	/*
+	 * Test 1: Verify net_L3A is visible in listns() after resurrection.
+	 * The entire ownership chain should be resurrected and visible.
+	 */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	if (ret < 0) {
+		free(handle);
+		close(sock_L3A_fd);
+		close(netns_L3A_fd);
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret, 0);
+	}
+
+	bool found_netns_L3A = false;
+	bool found_userns_L1 = false;
+	bool found_userns_L2 = false;
+	bool found_userns_L3 = false;
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_L3A_id)
+			found_netns_L3A = true;
+		if (ns_ids[i] == userns_L1_id)
+			found_userns_L1 = true;
+		if (ns_ids[i] == userns_L2_id)
+			found_userns_L2 = true;
+		if (ns_ids[i] == userns_L3_id)
+			found_userns_L3 = true;
+	}
+
+	ASSERT_TRUE(found_netns_L3A);
+	ASSERT_TRUE(found_userns_L1);
+	ASSERT_TRUE(found_userns_L2);
+	ASSERT_TRUE(found_userns_L3);
+	TH_LOG("Resurrection verified: all namespaces in hierarchy visible in listns()");
+
+	/*
+	 * Test 2: Verify net_L3A can be reopened via file handle.
+	 */
+	nsfs_fh = (struct nsfs_file_handle *)handle->f_handle;
+	nsfs_fh->ns_id = netns_L3A_id;
+	nsfs_fh->ns_type = 0;
+	nsfs_fh->ns_inum = 0;
+
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	if (reopened_fd < 0) {
+		free(handle);
+		close(sock_L3A_fd);
+		close(netns_L3A_fd);
+		if (errno == EOPNOTSUPP || errno == ENOSYS || errno == EBADF)
+			SKIP(return, "open_by_handle_at with FD_NSFS_ROOT not supported");
+		TH_LOG("open_by_handle_at failed: %s", strerror(errno));
+		ASSERT_GE(reopened_fd, 0);
+	}
+
+	close(reopened_fd);
+	TH_LOG("File handle test passed: net_L3A can be reopened");
+
+	/*
+	 * Test 3: Verify that when we close the netns FD (dropping the last
+	 * active reference), the entire tree becomes inactive and disappears
+	 * from listns(). The cascade goes: net_L3A drops -> userns_L3 drops ->
+	 * userns_L2 drops -> userns_L1 drops.
+	 */
+	close(netns_L3A_fd);
+
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	ASSERT_GE(ret, 0);
+
+	found_netns_L3A = false;
+	found_userns_L1 = false;
+	found_userns_L2 = false;
+	found_userns_L3 = false;
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_L3A_id)
+			found_netns_L3A = true;
+		if (ns_ids[i] == userns_L1_id)
+			found_userns_L1 = true;
+		if (ns_ids[i] == userns_L2_id)
+			found_userns_L2 = true;
+		if (ns_ids[i] == userns_L3_id)
+			found_userns_L3 = true;
+	}
+
+	ASSERT_FALSE(found_netns_L3A);
+	ASSERT_FALSE(found_userns_L1);
+	ASSERT_FALSE(found_userns_L2);
+	ASSERT_FALSE(found_userns_L3);
+	TH_LOG("Cascade test passed: all namespaces disappeared after netns FD closed");
+
+	/*
+	 * Test 4: Verify file handle no longer works for inactive namespace.
+	 */
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	if (reopened_fd >= 0) {
+		close(reopened_fd);
+		free(handle);
+		ASSERT_TRUE(false); /* Should have failed */
+	}
+	TH_LOG("Inactive namespace correctly cannot be reopened via file handle");
+
+	/*
+	 * Test 5: Verify that calling SIOCGSKNS again resurrects the tree again.
+	 * The socket is still valid, so we can call SIOCGSKNS on it to resurrect
+	 * the namespace tree once more.
+	 */
+	netns_L3A_fd = ioctl(sock_L3A_fd, SIOCGSKNS);
+	ASSERT_GE(netns_L3A_fd, 0);
+
+	TH_LOG("Called SIOCGSKNS again to resurrect the namespace tree");
+
+	/* Verify the namespace tree is resurrected and visible in listns() */
+	ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
+	ASSERT_GE(ret, 0);
+
+	found_netns_L3A = false;
+	found_userns_L1 = false;
+	found_userns_L2 = false;
+	found_userns_L3 = false;
+
+	for (i = 0; i < ret; i++) {
+		if (ns_ids[i] == netns_L3A_id)
+			found_netns_L3A = true;
+		if (ns_ids[i] == userns_L1_id)
+			found_userns_L1 = true;
+		if (ns_ids[i] == userns_L2_id)
+			found_userns_L2 = true;
+		if (ns_ids[i] == userns_L3_id)
+			found_userns_L3 = true;
+	}
+
+	ASSERT_TRUE(found_netns_L3A);
+	ASSERT_TRUE(found_userns_L1);
+	ASSERT_TRUE(found_userns_L2);
+	ASSERT_TRUE(found_userns_L3);
+	TH_LOG("Second resurrection verified: all namespaces in hierarchy visible in listns() again");
+
+	/* Verify we can reopen via file handle again */
+	reopened_fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+	if (reopened_fd < 0) {
+		free(handle);
+		close(sock_L3A_fd);
+		close(netns_L3A_fd);
+		TH_LOG("open_by_handle_at failed after second resurrection: %s", strerror(errno));
+		ASSERT_GE(reopened_fd, 0);
+	}
+
+	close(reopened_fd);
+	TH_LOG("File handle test passed: net_L3A can be reopened after second resurrection");
+
+	/* Final cleanup */
+	close(sock_L3A_fd);
+	close(netns_L3A_fd);
+	free(handle);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/stress_test.c b/tools/testing/selftests/namespaces/stress_test.c
new file mode 100644
index 000000000000..dd7df7d6cb27
--- /dev/null
+++ b/tools/testing/selftests/namespaces/stress_test.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/nsfs.h>
+#include "../kselftest_harness.h"
+#include "../filesystems/utils.h"
+#include "wrappers.h"
+
+/*
+ * Stress tests for namespace active reference counting.
+ *
+ * These tests validate that the active reference counting system can handle
+ * high load scenarios including rapid namespace creation/destruction, large
+ * numbers of concurrent namespaces, and various edge cases under stress.
+ */
+
+/*
+ * Test rapid creation and destruction of user namespaces.
+ * Create and destroy namespaces in quick succession to stress the
+ * active reference tracking and ensure no leaks occur.
+ */
+TEST(rapid_namespace_creation_destruction)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids_before[256], ns_ids_after[256];
+	ssize_t ret_before, ret_after;
+	int i;
+
+	/* Get baseline count of active user namespaces */
+	ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+	if (ret_before < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret_before, 0);
+	}
+
+	TH_LOG("Baseline: %zd active user namespaces", ret_before);
+
+	/* Rapidly create and destroy 100 user namespaces */
+	for (i = 0; i < 100; i++) {
+		pid_t pid = fork();
+		ASSERT_GE(pid, 0);
+
+		if (pid == 0) {
+			/* Child: create user namespace and immediately exit */
+			if (setup_userns() < 0)
+				exit(1);
+			exit(0);
+		}
+
+		/* Parent: wait for child */
+		int status;
+		waitpid(pid, &status, 0);
+		ASSERT_TRUE(WIFEXITED(status));
+		ASSERT_EQ(WEXITSTATUS(status), 0);
+	}
+
+	/* Verify we're back to baseline (no leaked namespaces) */
+	ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+	ASSERT_GE(ret_after, 0);
+
+	TH_LOG("After 100 rapid create/destroy cycles: %zd active user namespaces", ret_after);
+	ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test creating many concurrent namespaces.
+ * Verify that listns() correctly tracks all of them and that they all
+ * become inactive after processes exit.
+ */
+TEST(many_concurrent_namespaces)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids_before[512], ns_ids_during[512], ns_ids_after[512];
+	ssize_t ret_before, ret_during, ret_after;
+	pid_t pids[50];
+	int num_children = 50;
+	int i;
+	int sv[2];
+
+	/* Get baseline */
+	ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+	if (ret_before < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret_before, 0);
+	}
+
+	TH_LOG("Baseline: %zd active user namespaces", ret_before);
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+	/* Create many children, each with their own user namespace */
+	for (i = 0; i < num_children; i++) {
+		pids[i] = fork();
+		ASSERT_GE(pids[i], 0);
+
+		if (pids[i] == 0) {
+			/* Child: create user namespace and wait for parent signal */
+			char c;
+
+			close(sv[0]);
+
+			if (setup_userns() < 0) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			/* Signal parent we're ready */
+			if (write(sv[1], &c, 1) != 1) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			/* Wait for parent signal to exit */
+			if (read(sv[1], &c, 1) != 1) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			close(sv[1]);
+			exit(0);
+		}
+	}
+
+	close(sv[1]);
+
+	/* Wait for all children to signal ready */
+	for (i = 0; i < num_children; i++) {
+		char c;
+		if (read(sv[0], &c, 1) != 1) {
+			/* If we fail to read, kill all children and exit */
+			close(sv[0]);
+			for (int j = 0; j < num_children; j++)
+				kill(pids[j], SIGKILL);
+			for (int j = 0; j < num_children; j++)
+				waitpid(pids[j], NULL, 0);
+			ASSERT_TRUE(false);
+		}
+	}
+
+	/* List namespaces while all children are running */
+	ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
+	ASSERT_GE(ret_during, 0);
+
+	TH_LOG("With %d children running: %zd active user namespaces", num_children, ret_during);
+
+	/* Should have at least num_children more namespaces than baseline */
+	ASSERT_GE(ret_during, ret_before + num_children);
+
+	/* Signal all children to exit */
+	for (i = 0; i < num_children; i++) {
+		char c = 'X';
+		if (write(sv[0], &c, 1) != 1) {
+			/* If we fail to write, kill remaining children */
+			close(sv[0]);
+			for (int j = i; j < num_children; j++)
+				kill(pids[j], SIGKILL);
+			for (int j = 0; j < num_children; j++)
+				waitpid(pids[j], NULL, 0);
+			ASSERT_TRUE(false);
+		}
+	}
+
+	close(sv[0]);
+
+	/* Wait for all children */
+	for (i = 0; i < num_children; i++) {
+		int status;
+		waitpid(pids[i], &status, 0);
+		ASSERT_TRUE(WIFEXITED(status));
+	}
+
+	/* Verify we're back to baseline */
+	ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+	ASSERT_GE(ret_after, 0);
+
+	TH_LOG("After all children exit: %zd active user namespaces", ret_after);
+	ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test rapid namespace creation with different namespace types.
+ * Create multiple types of namespaces rapidly to stress the tracking system.
+ */
+TEST(rapid_mixed_namespace_creation)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,  /* All types */
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids_before[512], ns_ids_after[512];
+	ssize_t ret_before, ret_after;
+	int i;
+
+	/* Get baseline count */
+	ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+	if (ret_before < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret_before, 0);
+	}
+
+	TH_LOG("Baseline: %zd active namespaces (all types)", ret_before);
+
+	/* Rapidly create and destroy namespaces with multiple types */
+	for (i = 0; i < 50; i++) {
+		pid_t pid = fork();
+		ASSERT_GE(pid, 0);
+
+		if (pid == 0) {
+			/* Child: create multiple namespace types */
+			if (setup_userns() < 0)
+				exit(1);
+
+			/* Create additional namespace types */
+			if (unshare(CLONE_NEWNET) < 0)
+				exit(1);
+			if (unshare(CLONE_NEWUTS) < 0)
+				exit(1);
+			if (unshare(CLONE_NEWIPC) < 0)
+				exit(1);
+
+			exit(0);
+		}
+
+		/* Parent: wait for child */
+		int status;
+		waitpid(pid, &status, 0);
+		ASSERT_TRUE(WIFEXITED(status));
+	}
+
+	/* Verify we're back to baseline */
+	ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+	ASSERT_GE(ret_after, 0);
+
+	TH_LOG("After 50 rapid mixed namespace cycles: %zd active namespaces", ret_after);
+	ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test nested namespace creation under stress.
+ * Create deeply nested namespace hierarchies and verify proper cleanup.
+ */
+TEST(nested_namespace_stress)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids_before[512], ns_ids_after[512];
+	ssize_t ret_before, ret_after;
+	int i;
+
+	/* Get baseline */
+	ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+	if (ret_before < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret_before, 0);
+	}
+
+	TH_LOG("Baseline: %zd active user namespaces", ret_before);
+
+	/* Create 20 processes, each with nested user namespaces */
+	for (i = 0; i < 20; i++) {
+		pid_t pid = fork();
+		ASSERT_GE(pid, 0);
+
+		if (pid == 0) {
+			int userns_fd;
+			uid_t orig_uid = getuid();
+			int depth;
+
+			/* Create nested user namespaces (up to 5 levels) */
+			for (depth = 0; depth < 5; depth++) {
+				userns_fd = get_userns_fd(0, (depth == 0) ? orig_uid : 0, 1);
+				if (userns_fd < 0)
+					exit(1);
+
+				if (setns(userns_fd, CLONE_NEWUSER) < 0) {
+					close(userns_fd);
+					exit(1);
+				}
+				close(userns_fd);
+			}
+
+			exit(0);
+		}
+
+		/* Parent: wait for child */
+		int status;
+		waitpid(pid, &status, 0);
+		ASSERT_TRUE(WIFEXITED(status));
+	}
+
+	/* Verify we're back to baseline */
+	ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+	ASSERT_GE(ret_after, 0);
+
+	TH_LOG("After 20 nested namespace hierarchies: %zd active user namespaces", ret_after);
+	ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test listns() pagination under stress.
+ * Create many namespaces and verify pagination works correctly.
+ */
+TEST(listns_pagination_stress)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	pid_t pids[30];
+	int num_children = 30;
+	int i;
+	int sv[2];
+	__u64 all_ns_ids[512];
+	int total_found = 0;
+
+	ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
+
+	/* Create many children with user namespaces */
+	for (i = 0; i < num_children; i++) {
+		pids[i] = fork();
+		ASSERT_GE(pids[i], 0);
+
+		if (pids[i] == 0) {
+			char c;
+			close(sv[0]);
+
+			if (setup_userns() < 0) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			/* Signal parent we're ready */
+			if (write(sv[1], &c, 1) != 1) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			/* Wait for parent signal to exit */
+			if (read(sv[1], &c, 1) != 1) {
+				close(sv[1]);
+				exit(1);
+			}
+
+			close(sv[1]);
+			exit(0);
+		}
+	}
+
+	close(sv[1]);
+
+	/* Wait for all children to signal ready */
+	for (i = 0; i < num_children; i++) {
+		char c;
+		if (read(sv[0], &c, 1) != 1) {
+			/* If we fail to read, kill all children and exit */
+			close(sv[0]);
+			for (int j = 0; j < num_children; j++)
+				kill(pids[j], SIGKILL);
+			for (int j = 0; j < num_children; j++)
+				waitpid(pids[j], NULL, 0);
+			ASSERT_TRUE(false);
+		}
+	}
+
+	/* Paginate through all namespaces using small batch sizes */
+	req.ns_id = 0;
+	while (1) {
+		__u64 batch[5];  /* Small batch size to force pagination */
+		ssize_t ret;
+
+		ret = sys_listns(&req, batch, ARRAY_SIZE(batch), 0);
+		if (ret < 0) {
+			if (errno == ENOSYS) {
+				close(sv[0]);
+				for (i = 0; i < num_children; i++)
+					kill(pids[i], SIGKILL);
+				for (i = 0; i < num_children; i++)
+					waitpid(pids[i], NULL, 0);
+				SKIP(return, "listns() not supported");
+			}
+			ASSERT_GE(ret, 0);
+		}
+
+		if (ret == 0)
+			break;
+
+		/* Store results */
+		for (i = 0; i < ret && total_found < 512; i++) {
+			all_ns_ids[total_found++] = batch[i];
+		}
+
+		/* Update cursor for next batch */
+		if (ret == ARRAY_SIZE(batch))
+			req.ns_id = batch[ret - 1];
+		else
+			break;
+	}
+
+	TH_LOG("Paginated through %d user namespaces", total_found);
+
+	/* Verify no duplicates in pagination */
+	for (i = 0; i < total_found; i++) {
+		for (int j = i + 1; j < total_found; j++) {
+			if (all_ns_ids[i] == all_ns_ids[j]) {
+				TH_LOG("Found duplicate ns_id: %llu at positions %d and %d",
+				       (unsigned long long)all_ns_ids[i], i, j);
+				ASSERT_TRUE(false);
+			}
+		}
+	}
+
+	/* Signal all children to exit */
+	for (i = 0; i < num_children; i++) {
+		char c = 'X';
+		if (write(sv[0], &c, 1) != 1) {
+			close(sv[0]);
+			for (int j = i; j < num_children; j++)
+				kill(pids[j], SIGKILL);
+			for (int j = 0; j < num_children; j++)
+				waitpid(pids[j], NULL, 0);
+			ASSERT_TRUE(false);
+		}
+	}
+
+	close(sv[0]);
+
+	/* Wait for all children */
+	for (i = 0; i < num_children; i++) {
+		int status;
+		waitpid(pids[i], &status, 0);
+	}
+}
+
+/*
+ * Test concurrent namespace operations.
+ * Multiple processes creating, querying, and destroying namespaces concurrently.
+ */
+TEST(concurrent_namespace_operations)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = 0,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids_before[512], ns_ids_after[512];
+	ssize_t ret_before, ret_after;
+	pid_t pids[20];
+	int num_workers = 20;
+	int i;
+
+	/* Get baseline */
+	ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+	if (ret_before < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret_before, 0);
+	}
+
+	TH_LOG("Baseline: %zd active namespaces", ret_before);
+
+	/* Create worker processes that do concurrent operations */
+	for (i = 0; i < num_workers; i++) {
+		pids[i] = fork();
+		ASSERT_GE(pids[i], 0);
+
+		if (pids[i] == 0) {
+			/* Each worker: create namespaces, list them, repeat */
+			int iterations;
+
+			for (iterations = 0; iterations < 10; iterations++) {
+				int userns_fd;
+				__u64 temp_ns_ids[100];
+				ssize_t ret;
+
+				/* Create a user namespace */
+				userns_fd = get_userns_fd(0, getuid(), 1);
+				if (userns_fd < 0)
+					continue;
+
+				/* List namespaces */
+				ret = sys_listns(&req, temp_ns_ids, ARRAY_SIZE(temp_ns_ids), 0);
+				(void)ret;
+
+				close(userns_fd);
+
+				/* Small delay */
+				usleep(1000);
+			}
+
+			exit(0);
+		}
+	}
+
+	/* Wait for all workers */
+	for (i = 0; i < num_workers; i++) {
+		int status;
+		waitpid(pids[i], &status, 0);
+		ASSERT_TRUE(WIFEXITED(status));
+		ASSERT_EQ(WEXITSTATUS(status), 0);
+	}
+
+	/* Verify we're back to baseline */
+	ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+	ASSERT_GE(ret_after, 0);
+
+	TH_LOG("After concurrent operations: %zd active namespaces", ret_after);
+	ASSERT_EQ(ret_before, ret_after);
+}
+
+/*
+ * Test namespace churn - continuous creation and destruction.
+ * Simulates high-churn scenarios like container orchestration.
+ */
+TEST(namespace_churn)
+{
+	struct ns_id_req req = {
+		.size = sizeof(req),
+		.spare = 0,
+		.ns_id = 0,
+		.ns_type = CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS,
+		.spare2 = 0,
+		.user_ns_id = 0,
+	};
+	__u64 ns_ids_before[512], ns_ids_after[512];
+	ssize_t ret_before, ret_after;
+	int cycle;
+
+	/* Get baseline */
+	ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
+	if (ret_before < 0) {
+		if (errno == ENOSYS)
+			SKIP(return, "listns() not supported");
+		ASSERT_GE(ret_before, 0);
+	}
+
+	TH_LOG("Baseline: %zd active namespaces", ret_before);
+
+	/* Simulate churn: batches of namespaces created and destroyed */
+	for (cycle = 0; cycle < 10; cycle++) {
+		pid_t batch_pids[10];
+		int i;
+
+		/* Create batch */
+		for (i = 0; i < 10; i++) {
+			batch_pids[i] = fork();
+			ASSERT_GE(batch_pids[i], 0);
+
+			if (batch_pids[i] == 0) {
+				/* Create multiple namespace types */
+				if (setup_userns() < 0)
+					exit(1);
+				if (unshare(CLONE_NEWNET) < 0)
+					exit(1);
+				if (unshare(CLONE_NEWUTS) < 0)
+					exit(1);
+
+				/* Keep namespaces alive briefly */
+				usleep(10000);
+				exit(0);
+			}
+		}
+
+		/* Wait for batch to complete */
+		for (i = 0; i < 10; i++) {
+			int status;
+			waitpid(batch_pids[i], &status, 0);
+		}
+	}
+
+	/* Verify we're back to baseline */
+	ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
+	ASSERT_GE(ret_after, 0);
+
+	TH_LOG("After 10 churn cycles (100 namespace sets): %zd active namespaces", ret_after);
+	ASSERT_EQ(ret_before, ret_after);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/wrappers.h b/tools/testing/selftests/namespaces/wrappers.h
new file mode 100644
index 000000000000..9741a64a5b1d
--- /dev/null
+++ b/tools/testing/selftests/namespaces/wrappers.h
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/nsfs.h>
+#include <linux/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#ifndef __SELFTESTS_NAMESPACES_WRAPPERS_H__
+#define __SELFTESTS_NAMESPACES_WRAPPERS_H__
+
+#ifndef __NR_listns
+	#if defined __alpha__
+		#define __NR_listns 580
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_listns 4470
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_listns 6470
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_listns 5470
+		#endif
+	#else
+		#define __NR_listns 470
+	#endif
+#endif
+
+static inline int sys_listns(const struct ns_id_req *req, __u64 *ns_ids,
+			     size_t nr_ns_ids, unsigned int flags)
+{
+	return syscall(__NR_listns, req, ns_ids, nr_ns_ids, flags);
+}
+
+#endif /* __SELFTESTS_NAMESPACES_WRAPPERS_H__ */
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 439101b518ee..6930fe926c58 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -4,10 +4,8 @@ bind_timewait
 bind_wildcard
 busy_poller
 cmsg_sender
-diag_uid
 epoll_busy_poll
 fin_ack_lat
-gro
 hwtstamp_config
 io_uring_zerocopy_tx
 ioam6_parser
@@ -18,7 +16,6 @@ ipv6_flowlabel
 ipv6_flowlabel_mgr
 ipv6_fragmentation
 log.txt
-msg_oob
 msg_zerocopy
 netlink-dumps
 nettest
@@ -35,9 +32,6 @@ reuseport_bpf_numa
 reuseport_dualstack
 rxtimestamp
 sctp_hello
-scm_inq
-scm_pidfd
-scm_rights
 sk_bind_sendto_listen
 sk_connect_zero_addr
 sk_so_peek_off
@@ -56,7 +50,6 @@ tcp_port_share
 tfo
 timestamping
 tls
-toeplitz
 tools
 tun
 txring_overwrite
@@ -64,4 +57,3 @@ txtimestamp
 udpgso
 udpgso_bench_rx
 udpgso_bench_tx
-unix_connect
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b5127e968108..b66ba04f19d9 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -38,7 +38,6 @@ TEST_PROGS := \
 	fq_band_pktlimit.sh \
 	gre_gso.sh \
 	gre_ipv6_lladdr.sh \
-	gro.sh \
 	icmp.sh \
 	icmp_redirect.sh \
 	io_uring_zerocopy_tx.sh \
@@ -121,8 +120,6 @@ TEST_PROGS := \
 # end of TEST_PROGS
 
 TEST_PROGS_EXTENDED := \
-	toeplitz.sh \
-	toeplitz_client.sh \
 	xfrm_policy_add_speed.sh \
 # end of TEST_PROGS_EXTENDED
 
@@ -130,7 +127,6 @@ TEST_GEN_FILES := \
 	bind_bhash \
 	cmsg_sender \
 	fin_ack_lat \
-	gro \
 	hwtstamp_config \
 	io_uring_zerocopy_tx \
 	ioam6_parser \
@@ -159,7 +155,6 @@ TEST_GEN_FILES := \
 	tcp_mmap \
 	tfo \
 	timestamping \
-	toeplitz \
 	txring_overwrite \
 	txtimestamp \
 	udpgso \
@@ -193,8 +188,6 @@ TEST_FILES := \
 	in_netns.sh \
 	lib.sh \
 	settings \
-	setup_loopback.sh \
-	setup_veth.sh \
 # end of TEST_FILES
 
 # YNL files, must be before "include ..lib.mk"
diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore
new file mode 100644
index 000000000000..240b26740c9e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/.gitignore
@@ -0,0 +1,8 @@
+diag_uid
+msg_oob
+scm_inq
+scm_pidfd
+scm_rights
+so_peek_off
+unix_connect
+unix_connreset
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index de805cbbdf69..3cd677b72072 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -6,7 +6,9 @@ TEST_GEN_PROGS := \
 	scm_inq \
 	scm_pidfd \
 	scm_rights \
+	so_peek_off \
 	unix_connect \
+	unix_connreset \
 # end of TEST_GEN_PROGS
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c
new file mode 100644
index 000000000000..86e7b0fb522d
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/so_peek_off.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(so_peek_off)
+{
+	int fd[2];	/* 0: sender, 1: receiver */
+};
+
+FIXTURE_VARIANT(so_peek_off)
+{
+	int type;
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, stream)
+{
+	.type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, dgram)
+{
+	.type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, seqpacket)
+{
+	.type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(so_peek_off)
+{
+	struct timeval timeout = {
+		.tv_sec = 5,
+		.tv_usec = 0,
+	};
+	int ret;
+
+	ret = socketpair(AF_UNIX, variant->type, 0, self->fd);
+	ASSERT_EQ(0, ret);
+
+	ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW,
+			 &timeout, sizeof(timeout));
+	ASSERT_EQ(0, ret);
+
+	ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF,
+			 &(int){0}, sizeof(int));
+	ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(so_peek_off)
+{
+	close_range(self->fd[0], self->fd[1], 0);
+}
+
+#define sendeq(fd, str, flags)					\
+	do {							\
+		int bytes, len = strlen(str);			\
+								\
+		bytes = send(fd, str, len, flags);		\
+		ASSERT_EQ(len, bytes);				\
+	} while (0)
+
+#define recveq(fd, str, buflen, flags)				\
+	do {							\
+		char buf[(buflen) + 1] = {};			\
+		int bytes;					\
+								\
+		bytes = recv(fd, buf, buflen, flags);		\
+		ASSERT_NE(-1, bytes);				\
+		ASSERT_STREQ(str, buf);				\
+	} while (0)
+
+#define async							\
+	for (pid_t pid = (pid = fork(),				\
+			  pid < 0 ?				\
+			  __TH_LOG("Failed to start async {}"),	\
+			  _metadata->exit_code = KSFT_FAIL,	\
+			  __bail(1, _metadata),			\
+			  0xdead :				\
+			  pid);					\
+	     !pid; exit(0))
+
+TEST_F(so_peek_off, single_chunk)
+{
+	sendeq(self->fd[0], "aaaabbbb", 0);
+
+	recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks)
+{
+	sendeq(self->fd[0], "aaaa", 0);
+	sendeq(self->fd[0], "bbbb", 0);
+
+	recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_blocking)
+{
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "aaaa", 0);
+	}
+
+	recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "bbbb", 0);
+	}
+
+	/* goto again; -> goto redo; in unix_stream_read_generic(). */
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_overlap)
+{
+	sendeq(self->fd[0], "aaaa", 0);
+	recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+	sendeq(self->fd[0], "bbbb", 0);
+
+	if (variant->type == SOCK_STREAM) {
+		/* SOCK_STREAM tries to fill the buffer. */
+		recveq(self->fd[1], "aabb", 4, MSG_PEEK);
+		recveq(self->fd[1], "bb", 100, MSG_PEEK);
+	} else {
+		/* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */
+		recveq(self->fd[1], "aa", 100, MSG_PEEK);
+		recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+	}
+}
+
+TEST_F(so_peek_off, two_chunks_overlap_blocking)
+{
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "aaaa", 0);
+	}
+
+	recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "bbbb", 0);
+	}
+
+	/* Even SOCK_STREAM does not wait if at least one byte is read. */
+	recveq(self->fd[1], "aa", 100, MSG_PEEK);
+
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c
new file mode 100644
index 000000000000..08c1de8f5a98
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connreset.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest for AF_UNIX socket close and ECONNRESET behaviour.
+ *
+ * This test verifies:
+ *  1. SOCK_STREAM returns EOF when the peer closes normally.
+ *  2. SOCK_STREAM returns ECONNRESET if peer closes with unread data.
+ *  3. SOCK_SEQPACKET returns EOF when the peer closes normally.
+ *  4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data.
+ *  5. SOCK_DGRAM does not return ECONNRESET when the peer closes.
+ *
+ * These tests document the intended Linux behaviour.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "../../kselftest_harness.h"
+
+#define SOCK_PATH "/tmp/af_unix_connreset.sock"
+
+static void remove_socket_file(void)
+{
+	unlink(SOCK_PATH);
+}
+
+FIXTURE(unix_sock)
+{
+	int server;
+	int client;
+	int child;
+};
+
+FIXTURE_VARIANT(unix_sock)
+{
+	int socket_type;
+	const char *name;
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, stream) {
+	.socket_type = SOCK_STREAM,
+	.name = "SOCK_STREAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, dgram) {
+	.socket_type = SOCK_DGRAM,
+	.name = "SOCK_DGRAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, seqpacket) {
+	.socket_type = SOCK_SEQPACKET,
+	.name = "SOCK_SEQPACKET",
+};
+
+FIXTURE_SETUP(unix_sock)
+{
+	struct sockaddr_un addr = {};
+	int err;
+
+	addr.sun_family = AF_UNIX;
+	strcpy(addr.sun_path, SOCK_PATH);
+	remove_socket_file();
+
+	self->server = socket(AF_UNIX, variant->socket_type, 0);
+	ASSERT_LT(-1, self->server);
+
+	err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr));
+	ASSERT_EQ(0, err);
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		err = listen(self->server, 1);
+		ASSERT_EQ(0, err);
+	}
+
+	self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0);
+	ASSERT_LT(-1, self->client);
+
+	err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr));
+	ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(unix_sock)
+{
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET)
+		close(self->child);
+
+	close(self->client);
+	close(self->server);
+	remove_socket_file();
+}
+
+/* Test 1: peer closes normally */
+TEST_F(unix_sock, eof)
+{
+	char buf[16] = {};
+	ssize_t n;
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		self->child = accept(self->server, NULL, NULL);
+		ASSERT_LT(-1, self->child);
+
+		close(self->child);
+	} else {
+		close(self->server);
+	}
+
+	n = recv(self->client, buf, sizeof(buf), 0);
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		ASSERT_EQ(0, n);
+	} else {
+		ASSERT_EQ(-1, n);
+		ASSERT_EQ(EAGAIN, errno);
+	}
+}
+
+/* Test 2: peer closes with unread data */
+TEST_F(unix_sock, reset_unread_behavior)
+{
+	char buf[16] = {};
+	ssize_t n;
+
+	/* Send data that will remain unread */
+	send(self->client, "hello", 5, 0);
+
+	if (variant->socket_type == SOCK_DGRAM) {
+		/* No real connection, just close the server */
+		close(self->server);
+	} else {
+		self->child = accept(self->server, NULL, NULL);
+		ASSERT_LT(-1, self->child);
+
+		/* Peer closes before client reads */
+		close(self->child);
+	}
+
+	n = recv(self->client, buf, sizeof(buf), 0);
+	ASSERT_EQ(-1, n);
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		ASSERT_EQ(ECONNRESET, errno);
+	} else {
+		ASSERT_EQ(EAGAIN, errno);
+	}
+}
+
+/* Test 3: closing unaccepted (embryo) server socket should reset client. */
+TEST_F(unix_sock, reset_closed_embryo)
+{
+	char buf[16] = {};
+	ssize_t n;
+
+	if (variant->socket_type == SOCK_DGRAM) {
+		snprintf(_metadata->results->reason,
+			 sizeof(_metadata->results->reason),
+			 "Test only applies to SOCK_STREAM and SOCK_SEQPACKET");
+		exit(KSFT_XFAIL);
+	}
+
+	/* Close server without accept()ing */
+	close(self->server);
+
+	n = recv(self->client, buf, sizeof(buf), 0);
+
+	ASSERT_EQ(-1, n);
+	ASSERT_EQ(ECONNRESET, errno);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
index 92eb880c52f2..00758f00efbf 100755
--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -75,7 +75,7 @@ setup_v4() {
     ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
     if [ $? -ne 0 ]; then
         cleanup_v4
-        echo "failed"
+        echo "failed; is the system using MACAddressPolicy=persistent ?"
         exit 1
     fi
 
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index 4046131e7888..d9e5b967f815 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 # Test various bareudp tunnel configurations.
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 7d2d40812074..5ec1c85c1623 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -27,6 +27,8 @@ NAPI_DEFER_HARD_IRQS=100
 GRO_FLUSH_TIMEOUT=50000
 SUSPEND_TIMEOUT=20000000
 
+NAPI_THREADED_MODE_BUSY_POLL=2
+
 setup_ns()
 {
 	set -e
@@ -62,6 +64,9 @@ cleanup_ns()
 test_busypoll()
 {
 	suspend_value=${1:-0}
+	napi_threaded_value=${2:-0}
+	prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL}
+
 	tmp_file=$(mktemp)
 	out_file=$(mktemp)
 
@@ -73,10 +78,11 @@ test_busypoll()
 					     -b${SERVER_IP}        \
 					     -m${MAX_EVENTS}       \
 					     -u${BUSY_POLL_USECS}  \
-					     -P${PREFER_BUSY_POLL} \
+					     -P${prefer_busy_poll_value} \
 					     -g${BUSY_POLL_BUDGET} \
 					     -i${NSIM_SV_IFIDX}    \
 					     -s${suspend_value}    \
+					     -t${napi_threaded_value} \
 					     -o${out_file}&
 
 	wait_local_port_listen nssv ${SERVER_PORT} tcp
@@ -109,6 +115,15 @@ test_busypoll_with_suspend()
 	return $?
 }
 
+test_busypoll_with_napi_threaded()
+{
+	# Only enable napi threaded poll. Set suspend timeout and prefer busy
+	# poll to 0.
+	test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0
+
+	return $?
+}
+
 ###
 ### Code start
 ###
@@ -154,6 +169,13 @@ if [ $? -ne 0 ]; then
 	exit 1
 fi
 
+test_busypoll_with_napi_threaded
+if [ $? -ne 0 ]; then
+	echo "test_busypoll_with_napi_threaded failed"
+	cleanup_ns
+	exit 1
+fi
+
 echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
 
 echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
index 04c7ff577bb8..3a81f9c94795 100644
--- a/tools/testing/selftests/net/busy_poller.c
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -65,15 +65,16 @@ static uint32_t cfg_busy_poll_usecs;
 static uint16_t cfg_busy_poll_budget;
 static uint8_t cfg_prefer_busy_poll;
 
-/* IRQ params */
+/* NAPI params */
 static uint32_t cfg_defer_hard_irqs;
 static uint64_t cfg_gro_flush_timeout;
 static uint64_t cfg_irq_suspend_timeout;
+static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED;
 
 static void usage(const char *filepath)
 {
 	error(1, 0,
-	      "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
+	      "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>",
 	      filepath);
 }
 
@@ -86,7 +87,7 @@ static void parse_opts(int argc, char **argv)
 	if (argc <= 1)
 		usage(argv[0]);
 
-	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
+	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) {
 		/* most options take integer values, except o and b, so reduce
 		 * code duplication a bit for the common case by calling
 		 * strtoull here and leave bounds checking and casting per
@@ -168,6 +169,12 @@ static void parse_opts(int argc, char **argv)
 
 			cfg_ifindex = (int)tmp;
 			break;
+		case 't':
+			if (tmp > 2)
+				error(1, ERANGE, "napi threaded poll value must be 0-2");
+
+			cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp;
+			break;
 		}
 	}
 
@@ -247,6 +254,9 @@ static void setup_queue(void)
 	netdev_napi_set_req_set_irq_suspend_timeout(set_req,
 						    cfg_irq_suspend_timeout);
 
+	if (cfg_napi_threaded_poll)
+		netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll);
+
 	if (netdev_napi_set(ys, set_req))
 		error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
 
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index a94b73a53f72..a88f797c549a 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
        ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
        ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
        ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
-       ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance"
+       ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \
+       fib6_ra_to_static"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -1476,6 +1477,68 @@ ipv6_route_metrics_test()
 	route_cleanup
 }
 
+fib6_ra_to_static()
+{
+	setup
+
+	echo
+	echo "Fib6 route promotion from RA-learned to static test"
+	set -e
+
+	# ra6 is required for the test. (ipv6toolkit)
+	if [ ! -x "$(command -v ra6)" ]; then
+	    echo "SKIP: ra6 not found."
+	    set +e
+	    cleanup &> /dev/null
+	    return
+	fi
+
+	# Create a pair of veth devices to send a RA message from one
+	# device to another.
+	$IP link add veth1 type veth peer name veth2
+	$IP link set dev veth1 up
+	$IP link set dev veth2 up
+	$IP -6 address add 2001:10::1/64 dev veth1 nodad
+	$IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+	# Make veth1 ready to receive RA messages.
+	$NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+	# Send a RA message with a prefix from veth2.
+	$NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+
+	# Wait for the RA message.
+	sleep 1
+
+	# systemd may mess up the test. Make sure that
+	# systemd-networkd.service and systemd-networkd.socket are stopped.
+	check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return
+
+	# Configure static address on the same prefix
+	$IP -6 address add 2001:12::dead/64 dev veth1 nodad
+
+	# On-link route won't expire anymore, default route still owned by RA
+	check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+	# Send a second RA message with a prefix from veth2.
+	$NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+	sleep 1
+
+	# Expire is not back, on-link route is still static
+	check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+	$IP -6 address del 2001:12::dead/64 dev veth1 nodad
+
+	# Expire is back, on-link route is now owned by RA again
+	check_rt_num 2 $($IP -6 route list |grep expires|wc -l)
+
+	log_test $ret 0 "ipv6 promote RA route to static"
+
+	set +e
+
+	cleanup &> /dev/null
+}
+
 # add route for a prefix, flushing any existing routes first
 # expected to be the first step of a test
 add_route()
@@ -2798,6 +2861,7 @@ do
 	ipv6_mpath_list)		ipv6_mpath_list_test;;
 	ipv4_mpath_balance)		ipv4_mpath_balance_test;;
 	ipv6_mpath_balance)		ipv6_mpath_balance_test;;
+	fib6_ra_to_static)		fib6_ra_to_static;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index 8c1597ebc2d3..e86d77946585 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -28,6 +28,7 @@ ALL_TESTS="
 	cfg_test
 	fwd_test
 	ctrl_test
+	disable_test
 "
 
 NUM_NETIFS=4
@@ -64,7 +65,10 @@ h2_destroy()
 
 switch_create()
 {
-	ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+	local vlan_filtering=$1; shift
+
+	ip link add name br0 type bridge \
+		vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \
 		mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
 	bridge vlan add vid 10 dev br0 self
 	bridge vlan add vid 20 dev br0 self
@@ -118,7 +122,7 @@ setup_prepare()
 
 	h1_create
 	h2_create
-	switch_create
+	switch_create 1
 }
 
 cleanup()
@@ -1357,6 +1361,98 @@ ctrl_test()
 	ctrl_mldv2_is_in_test
 }
 
+check_group()
+{
+	local group=$1; shift
+	local vid=$1; shift
+	local should_fail=$1; shift
+	local when=$1; shift
+	local -a vidkws
+
+	if ((vid)); then
+		vidkws=(vid "$vid")
+	fi
+
+	bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null |
+		grep -q "port $swp1"
+	check_err_fail "$should_fail" $? "$group seen $when snooping disable:"
+}
+
+__disable_test()
+{
+	local vid=$1; shift
+	local what=$1; shift
+	local -a vidkws
+
+	if ((vid)); then
+		vidkws=(vid "$vid")
+	fi
+
+	RET=0
+
+	bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \
+		"${vidkws[@]}" filter_mode include source_list 2001:db8:1::1
+	bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \
+		"${vidkws[@]}" filter_mode exclude
+
+	bridge mdb add dev br0 port "$swp1" grp ff0e::3 \
+		"${vidkws[@]}" filter_mode include source_list 2001:db8:1::2
+	bridge mdb add dev br0 port "$swp1" grp ff0e::4 \
+		"${vidkws[@]}" filter_mode exclude
+
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \
+		"${vidkws[@]}" filter_mode include source_list 192.0.2.1
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \
+		"${vidkws[@]}" filter_mode exclude
+
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \
+		"${vidkws[@]}" filter_mode include source_list 192.0.2.2
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \
+		"${vidkws[@]}" filter_mode exclude
+
+	check_group ff0e::1 "$vid" 0 "before"
+	check_group ff0e::2 "$vid" 0 "before"
+	check_group ff0e::3 "$vid" 0 "before"
+	check_group ff0e::4 "$vid" 0 "before"
+
+	check_group 239.1.1.1 "$vid" 0 "before"
+	check_group 239.1.1.2 "$vid" 0 "before"
+	check_group 239.1.1.3 "$vid" 0 "before"
+	check_group 239.1.1.4 "$vid" 0 "before"
+
+	ip link set dev br0 type bridge mcast_snooping 0
+
+	check_group ff0e::1 "$vid" 0 "after"
+	check_group ff0e::2 "$vid" 0 "after"
+	check_group ff0e::3 "$vid" 1 "after"
+	check_group ff0e::4 "$vid" 1 "after"
+
+	check_group 239.1.1.1 "$vid" 0 "after"
+	check_group 239.1.1.2 "$vid" 0 "after"
+	check_group 239.1.1.3 "$vid" 1 "after"
+	check_group 239.1.1.4 "$vid" 1 "after"
+
+	log_test "$what: Flush after disable"
+
+	ip link set dev br0 type bridge mcast_snooping 1
+	sleep 10
+}
+
+disable_test()
+{
+	__disable_test 10 802.1q
+
+	switch_destroy
+	switch_create 0
+	setup_wait
+
+	__disable_test 0 802.1d
+
+	switch_destroy
+	switch_create 1
+	setup_wait
+}
+
 if ! bridge mdb help 2>&1 | grep -q "flush"; then
 	echo "SKIP: iproute2 too old, missing bridge mdb flush support"
 	exit $ksft_skip
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
index ff2accccaf4d..b4eda6c6199e 100755
--- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -30,6 +30,11 @@ tfail()
 	do_test "tfail" false
 }
 
+tfail2()
+{
+	do_test "tfail2" false
+}
+
 txfail()
 {
 	FAIL_TO_XFAIL=yes do_test "txfail" false
@@ -132,6 +137,8 @@ test_ret()
 	ret_subtest $ksft_fail "tfail" txfail tfail
 
 	ret_subtest $ksft_xfail "txfail" txfail txfail
+
+	ret_subtest $ksft_fail "tfail2" tfail2 tfail
 }
 
 exit_status_tests_run()
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index ecd34f364125..892895659c7e 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -176,6 +176,8 @@ run_test()
 	local rcv_dmac=$(mac_get $rcv_if_name)
 	local should_receive
 
+	setup_wait
+
 	tcpdump_start $rcv_if_name
 
 	mc_route_prepare $send_if_name
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
deleted file mode 100755
index 4c5144c6f652..000000000000
--- a/tools/testing/selftests/net/gro.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly SERVER_MAC="aa:00:00:00:00:02"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
-readonly PROTOS=("ipv4" "ipv6" "ipip")
-dev=""
-test="all"
-proto="ipv4"
-
-run_test() {
-  local server_pid=0
-  local exit_code=0
-  local protocol=$1
-  local test=$2
-  local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
-  "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
-
-  setup_ns
-  # Each test is run 6 times to deflake, because given the receive timing,
-  # not all packets that should coalesce will be considered in the same flow
-  # on every try.
-  for tries in {1..6}; do
-    # Actual test starts here
-    ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
-      1>>log.txt &
-    server_pid=$!
-    sleep 0.5  # to allow for socket init
-    ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
-      1>>log.txt
-    wait "${server_pid}"
-    exit_code=$?
-    if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
-          ${exit_code} -ne 0 ]]; then
-        echo "Ignoring errors due to slow environment" 1>&2
-        exit_code=0
-    fi
-    if [[ "${exit_code}" -eq 0 ]]; then
-        break;
-    fi
-  done
-  cleanup_ns
-  echo ${exit_code}
-}
-
-run_all_tests() {
-  local failed_tests=()
-  for proto in "${PROTOS[@]}"; do
-    for test in "${TESTS[@]}"; do
-      echo "running test ${proto} ${test}" >&2
-      exit_code=$(run_test $proto $test)
-      if [[ "${exit_code}" -ne 0 ]]; then
-        failed_tests+=("${proto}_${test}")
-      fi;
-    done;
-  done
-  if [[ ${#failed_tests[@]} -ne 0 ]]; then
-    echo "failed tests: ${failed_tests[*]}. \
-    Please see log.txt for more logs"
-    exit 1
-  else
-    echo "All Tests Succeeded!"
-  fi;
-}
-
-usage() {
-  echo "Usage: $0 \
-  [-i <DEV>] \
-  [-t data|ack|flags|tcp|ip|large] \
-  [-p <ipv4|ipv6>]" 1>&2;
-  exit 1;
-}
-
-while getopts "i:t:p:" opt; do
-  case "${opt}" in
-    i)
-      dev="${OPTARG}"
-      ;;
-    t)
-      test="${OPTARG}"
-      ;;
-    p)
-      proto="${OPTARG}"
-      ;;
-    *)
-      usage
-      ;;
-  esac
-done
-
-if [ -n "$dev" ]; then
-	source setup_loopback.sh
-else
-	source setup_veth.sh
-fi
-
-setup
-trap cleanup EXIT
-if [[ "${test}" == "all" ]]; then
-  run_all_tests
-else
-  exit_code=$(run_test "${proto}" "${test}")
-  exit $exit_code
-fi;
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 76e604e4810e..7bfeeb133705 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol)
 
 	ret = io_uring_queue_init(512, &ring, 0);
 	if (ret)
-		error(1, ret, "io_uring: queue init");
+		error(1, -ret, "io_uring: queue init");
 
 	iov.iov_base = payload;
 	iov.iov_len = cfg_payload_len;
 
 	ret = io_uring_register_buffers(&ring, &iov, 1);
 	if (ret)
-		error(1, ret, "io_uring: buffer registration");
+		error(1, -ret, "io_uring: buffer registration");
 
 	tstop = gettimeofday_ms() + cfg_runtime_ms;
 	do {
@@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol)
 
 		ret = io_uring_submit(&ring);
 		if (ret != cfg_nr_reqs)
-			error(1, ret, "submit");
+			error(1, -ret, "submit");
 
 		if (cfg_cork)
 			do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
 		for (i = 0; i < cfg_nr_reqs; i++) {
 			ret = io_uring_wait_cqe(&ring, &cqe);
 			if (ret)
-				error(1, ret, "wait cqe");
+				error(1, -ret, "wait cqe");
 
 			if (cqe->user_data != NONZC_TAG &&
 			    cqe->user_data != ZC_TAG)
-				error(1, -EINVAL, "invalid cqe->user_data");
+				error(1, EINVAL, "invalid cqe->user_data");
 
 			if (cqe->flags & IORING_CQE_F_NOTIF) {
 				if (cqe->flags & IORING_CQE_F_MORE)
-					error(1, -EINVAL, "invalid notif flags");
+					error(1, EINVAL, "invalid notif flags");
 				if (compl_cqes <= 0)
-					error(1, -EINVAL, "notification mismatch");
+					error(1, EINVAL, "notification mismatch");
 				compl_cqes--;
 				i--;
 				io_uring_cqe_seen(&ring);
@@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol)
 			}
 			if (cqe->flags & IORING_CQE_F_MORE) {
 				if (cqe->user_data != ZC_TAG)
-					error(1, cqe->res, "unexpected F_MORE");
+					error(1, -cqe->res, "unexpected F_MORE");
 				compl_cqes++;
 			}
 			if (cqe->res >= 0) {
 				packets++;
 				bytes += cqe->res;
 			} else if (cqe->res != -EAGAIN) {
-				error(1, cqe->res, "send failed");
+				error(1, -cqe->res, "send failed");
 			}
 			io_uring_cqe_seen(&ring);
 		}
@@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol)
 	while (compl_cqes) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret)
-			error(1, ret, "wait cqe");
+			error(1, -ret, "wait cqe");
 		if (cqe->flags & IORING_CQE_F_MORE)
-			error(1, -EINVAL, "invalid notif flags");
+			error(1, EINVAL, "invalid notif flags");
 		if (!(cqe->flags & IORING_CQE_F_NOTIF))
-			error(1, -EINVAL, "missing notif flag");
+			error(1, EINVAL, "missing notif flag");
 
 		io_uring_cqe_seen(&ring);
 		compl_cqes--;
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index feba4ef69a54..f448bafb3f20 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -43,7 +43,7 @@ __ksft_status_merge()
 		weights[$i]=$((weight++))
 	done
 
-	if [[ ${weights[$a]} > ${weights[$b]} ]]; then
+	if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then
 		echo "$a"
 		return 0
 	else
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index ce795bc0a1af..5339f56329e1 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -8,6 +8,7 @@ CFLAGS += -I../../
 TEST_FILES := \
 	../../../../net/ynl \
 	../../../../../Documentation/netlink/specs \
+	ksft_setup_loopback.sh \
 # end of TEST_FILES
 
 TEST_GEN_FILES := \
diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
new file mode 100755
index 000000000000..3defbb1919c5
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Setup script for running ksft tests over a real interface in loopback mode.
+# This scripts replaces the historical setup_loopback.sh. It puts
+# a (presumably) real hardware interface into loopback mode, creates macvlan
+# interfaces on top and places them in a network namespace for isolation.
+#
+# NETIF env variable must be exported to indicate the real target device.
+# Note that the test will override NETIF with one of the macvlans, the
+# actual ksft test will only see the macvlans.
+#
+# Example use:
+#   export NETIF=eth0
+#   ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py
+
+if [ -z "$NETIF" ]; then
+    echo "Error: NETIF variable not set"
+    exit 1
+fi
+if ! [ -d "/sys/class/net/$NETIF" ]; then
+    echo "Error: Can't find $NETIF, invalid netdevice"
+    exit 1
+fi
+
+# Save original settings for cleanup
+readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs"
+FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")"
+readonly FLUSH_TIMEOUT
+HARD_IRQS="$(< "${IRQ_PATH}")"
+readonly HARD_IRQS
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+readonly SERVER_NS
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+readonly CLIENT_NS
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+# ksft expects addresses to communicate with remote
+export  LOCAL_V6=2001:db8:1::1
+export REMOTE_V6=2001:db8:1::2
+
+cleanup() {
+    local exit_code=$?
+
+    echo "Cleaning up..."
+
+    # Remove macvlan interfaces and namespaces
+    ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true
+    ip netns del "${SERVER_NS}" 2>/dev/null || true
+    ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true
+    ip netns del "${CLIENT_NS}" 2>/dev/null || true
+
+    # Disable loopback
+    ethtool -K "${NETIF}" loopback off 2>/dev/null || true
+    sleep 1
+
+    echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+    echo "${HARD_IRQS}" >"${IRQ_PATH}"
+
+    exit $exit_code
+}
+
+trap cleanup EXIT INT TERM
+
+# Enable loopback mode
+echo "Enabling loopback on ${NETIF}..."
+ethtool -K "${NETIF}" loopback on || {
+    echo "Failed to enable loopback mode"
+    exit 1
+}
+# The interface may need time to get carrier back, but selftests
+# will wait for carrier, so no need to wait / sleep here.
+
+# Use timer on  host to trigger the network stack
+# Also disable device interrupt to not depend on NIC interrupt
+# Reduce test flakiness caused by unexpected interrupts
+echo 100000 >"${FLUSH_PATH}"
+echo 50 >"${IRQ_PATH}"
+
+# Create server namespace with macvlan
+ip netns add "${SERVER_NS}"
+ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan
+ip link set dev server netns "${SERVER_NS}"
+ip -netns "${SERVER_NS}" link set dev server up
+ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server
+ip -netns "${SERVER_NS}" link set dev lo up
+
+# Create client namespace with macvlan
+ip netns add "${CLIENT_NS}"
+ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan
+ip link set dev client netns "${CLIENT_NS}"
+ip -netns "${CLIENT_NS}" link set dev client up
+ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client
+ip -netns "${CLIENT_NS}" link set dev lo up
+
+echo "Setup complete!"
+echo "  Device: ${NETIF}"
+echo "  Server NS: ${SERVER_NS}"
+echo "  Client NS: ${CLIENT_NS}"
+echo ""
+
+# Setup environment variables for tests
+export NETIF=server
+export REMOTE_TYPE=netns
+export REMOTE_ARGS="${CLIENT_NS}"
+
+# Run the command
+ip netns exec "${SERVER_NS}" "$@"
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 997b85cc216a..40f9ce307dd1 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -1,9 +1,33 @@
 # SPDX-License-Identifier: GPL-2.0
 
+"""
+Python selftest helpers for netdev.
+"""
+
 from .consts import KSRC
-from .ksft import *
+from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
+    ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \
+    ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \
+    ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \
+    ksft_variants, KsftNamedVariant
 from .netns import NetNS, NetNSEnter
-from .nsim import *
-from .utils import *
+from .nsim import NetdevSim, NetdevSimDev
+from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
+    bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file
 from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
 from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily
+
+__all__ = ["KSRC",
+           "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq",
+           "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in",
+           "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises",
+           "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup",
+           "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant",
+           "NetNS", "NetNSEnter",
+           "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
+           "bpftool", "ip", "ethtool", "bpftrace", "rand_port",
+           "wait_port_listen", "wait_file",
+           "NetdevSim", "NetdevSimDev",
+           "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError",
+           "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily",
+           "RtnlAddrFamily"]
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 83b1574f7719..531e7fa1b3ea 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 
-import builtins
 import functools
 import inspect
 import signal
 import sys
 import time
 import traceback
+from collections import namedtuple
 from .consts import KSFT_MAIN_NAME
 from .utils import global_defer_queue
 
@@ -136,7 +136,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
         time.sleep(sleep)
 
 
-def ktap_result(ok, cnt=1, case="", comment=""):
+def ktap_result(ok, cnt=1, case_name="", comment=""):
     global KSFT_RESULT_ALL
     KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
 
@@ -146,8 +146,8 @@ def ktap_result(ok, cnt=1, case="", comment=""):
     res += "ok "
     res += str(cnt) + " "
     res += KSFT_MAIN_NAME
-    if case:
-        res += "." + str(case.__name__)
+    if case_name:
+        res += "." + case_name
     if comment:
         res += " # " + comment
     print(res, flush=True)
@@ -163,7 +163,7 @@ def ksft_flush_defer():
         entry = global_defer_queue.pop()
         try:
             entry.exec_only()
-        except:
+        except Exception:
             ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
             tb = traceback.format_exc()
             for line in tb.strip().split('\n'):
@@ -171,6 +171,10 @@ def ksft_flush_defer():
             KSFT_RESULT = False
 
 
+KsftCaseFunction = namedtuple("KsftCaseFunction",
+                              ['name', 'original_func', 'variants'])
+
+
 def ksft_disruptive(func):
     """
     Decorator that marks the test as disruptive (e.g. the test
@@ -181,11 +185,47 @@ def ksft_disruptive(func):
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         if not KSFT_DISRUPTIVE:
-            raise KsftSkipEx(f"marked as disruptive")
+            raise KsftSkipEx("marked as disruptive")
         return func(*args, **kwargs)
     return wrapper
 
 
+class KsftNamedVariant:
+    """ Named string name + argument list tuple for @ksft_variants """
+
+    def __init__(self, name, *params):
+        self.params = params
+        self.name = name or "_".join([str(x) for x in self.params])
+
+
+def ksft_variants(params):
+    """
+    Decorator defining the sets of inputs for a test.
+    The parameters will be included in the name of the resulting sub-case.
+    Parameters can be either single object, tuple or a KsftNamedVariant.
+    The argument can be a list or a generator.
+
+    Example:
+
+    @ksft_variants([
+        (1, "a"),
+        (2, "b"),
+        KsftNamedVariant("three", 3, "c"),
+    ])
+    def my_case(cfg, a, b):
+        pass # ...
+
+    ksft_run(cases=[my_case], args=(cfg, ))
+
+    Will generate cases:
+        my_case.1_a
+        my_case.2_b
+        my_case.three
+    """
+
+    return lambda func: KsftCaseFunction(func.__name__, func, params)
+
+
 def ksft_setup(env):
     """
     Setup test framework global state from the environment.
@@ -199,7 +239,7 @@ def ksft_setup(env):
             return False
         try:
             return bool(int(value))
-        except:
+        except Exception:
             raise Exception(f"failed to parse {name}")
 
     if "DISRUPTIVE" in env:
@@ -220,9 +260,13 @@ def _ksft_intr(signum, frame):
         ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
 
 
-def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+def _ksft_generate_test_cases(cases, globs, case_pfx, args):
+    """Generate a flat list of (func, args, name) tuples"""
+
     cases = cases or []
+    test_cases = []
 
+    # If using the globs method find all relevant functions
     if globs and case_pfx:
         for key, value in globs.items():
             if not callable(value):
@@ -232,6 +276,27 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
                     cases.append(value)
                     break
 
+    for func in cases:
+        if isinstance(func, KsftCaseFunction):
+            # Parametrized test - create case for each param
+            for param in func.variants:
+                if not isinstance(param, KsftNamedVariant):
+                    if not isinstance(param, tuple):
+                        param = (param, )
+                    param = KsftNamedVariant(None, *param)
+
+                test_cases.append((func.original_func,
+                                   (*args, *param.params),
+                                   func.name + "." + param.name))
+        else:
+            test_cases.append((func, args, func.__name__))
+
+    return test_cases
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+    test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args)
+
     global term_cnt
     term_cnt = 0
     prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
@@ -239,19 +304,19 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
     totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
 
     print("TAP version 13", flush=True)
-    print("1.." + str(len(cases)), flush=True)
+    print("1.." + str(len(test_cases)), flush=True)
 
     global KSFT_RESULT
     cnt = 0
     stop = False
-    for case in cases:
+    for func, args, name in test_cases:
         KSFT_RESULT = True
         cnt += 1
         comment = ""
         cnt_key = ""
 
         try:
-            case(*args)
+            func(*args)
         except KsftSkipEx as e:
             comment = "SKIP " + str(e)
             cnt_key = 'skip'
@@ -268,12 +333,26 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
             KSFT_RESULT = False
             cnt_key = 'fail'
 
-        ksft_flush_defer()
+        try:
+            ksft_flush_defer()
+        except BaseException as e:
+            tb = traceback.format_exc()
+            for line in tb.strip().split('\n'):
+                ksft_pr("Exception|", line)
+            if isinstance(e, KeyboardInterrupt):
+                ksft_pr()
+                ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
+                ksft_pr("      Attempting to finish cleanup before exiting.")
+                ksft_pr("      Interrupt again to exit immediately.")
+                ksft_pr()
+                stop = True
+            # Flush was interrupted, try to finish the job best we can
+            ksft_flush_defer()
 
         if not cnt_key:
             cnt_key = 'pass' if KSFT_RESULT else 'fail'
 
-        ktap_result(KSFT_RESULT, cnt, case, comment=comment)
+        ktap_result(KSFT_RESULT, cnt, name, comment=comment)
         totals[cnt_key] += 1
 
         if stop:
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
index 1a8cbe9acc48..7c640ed64c0b 100644
--- a/tools/testing/selftests/net/lib/py/nsim.py
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -27,7 +27,7 @@ class NetdevSim:
         self.port_index = port_index
         self.ns = ns
         self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
-        ret = ip("-j link show dev %s" % ifname, ns=ns)
+        ret = ip("-d -j link show dev %s" % ifname, ns=ns)
         self.dev = json.loads(ret.stdout)[0]
         self.ifindex = self.dev["ifindex"]
 
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index cb40ecef9456..106ee1f2df86 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -32,7 +32,7 @@ class cmd:
     Use bkg() instead to run a command in the background.
     """
     def __init__(self, comm, shell=None, fail=True, ns=None, background=False,
-                 host=None, timeout=5, ksft_wait=None):
+                 host=None, timeout=5, ksft_ready=None, ksft_wait=None):
         if ns:
             comm = f'ip netns exec {ns} ' + comm
 
@@ -52,21 +52,25 @@ class cmd:
             # ksft_wait lets us wait for the background process to fully start,
             # we pass an FD to the child process, and wait for it to write back.
             # Similarly term_fd tells child it's time to exit.
-            pass_fds = ()
+            pass_fds = []
             env = os.environ.copy()
             if ksft_wait is not None:
-                rfd, ready_fd = os.pipe()
                 wait_fd, self.ksft_term_fd = os.pipe()
-                pass_fds = (ready_fd, wait_fd, )
-                env["KSFT_READY_FD"] = str(ready_fd)
+                pass_fds.append(wait_fd)
                 env["KSFT_WAIT_FD"]  = str(wait_fd)
+                ksft_ready = True  # ksft_wait implies ready
+            if ksft_ready is not None:
+                rfd, ready_fd = os.pipe()
+                pass_fds.append(ready_fd)
+                env["KSFT_READY_FD"] = str(ready_fd)
 
             self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE, pass_fds=pass_fds,
                                          env=env)
             if ksft_wait is not None:
-                os.close(ready_fd)
                 os.close(wait_fd)
+            if ksft_ready is not None:
+                os.close(ready_fd)
                 msg = fd_read_timeout(rfd, ksft_wait)
                 os.close(rfd)
                 if not msg:
@@ -116,10 +120,10 @@ class bkg(cmd):
         with bkg("my_binary", ksft_wait=5):
     """
     def __init__(self, comm, shell=None, fail=None, ns=None, host=None,
-                 exit_wait=False, ksft_wait=None):
+                 exit_wait=False, ksft_ready=None, ksft_wait=None):
         super().__init__(comm, background=True,
                          shell=shell, fail=fail, ns=ns, host=host,
-                         ksft_wait=ksft_wait)
+                         ksft_ready=ksft_ready, ksft_wait=ksft_wait)
         self.terminate = not exit_wait and not ksft_wait
         self._exit_wait = exit_wait
         self.check_fail = fail
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
index c368fc045f4b..64f05229ab24 100644
--- a/tools/testing/selftests/net/lib/xdp_native.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -332,7 +332,7 @@ static __u16 csum_fold_helper(__u32 csum)
 }
 
 static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
-				     __u32 hdr_len)
+				     unsigned long hdr_len)
 {
 	char tmp_buff[MAX_ADJST_OFFSET];
 	__u32 buff_pos, udp_csum = 0;
@@ -422,8 +422,9 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
 {
 	struct udphdr *udph = NULL;
 	__s32 *adjust_offset, *val;
-	__u32 key, hdr_len;
+	unsigned long hdr_len;
 	void *offset_ptr;
+	__u32 key;
 	__u8 tag;
 	int ret;
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index b148cadb96d0..404a77bf366a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -710,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
 
 				bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
 				if (bw < 0) {
-					if (cfg_rcv_trunc)
-						return 0;
+					/* expected reset, continue to read */
+					if (cfg_rcv_trunc &&
+					    (errno == ECONNRESET ||
+					     errno == EPIPE)) {
+						fds.events &= ~POLLOUT;
+						continue;
+					}
+
 					perror("write");
 					return 111;
 				}
@@ -737,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
 		}
 
 		if (fds.revents & (POLLERR | POLLNVAL)) {
-			if (cfg_rcv_trunc)
-				return 0;
+			if (cfg_rcv_trunc) {
+				fds.events &= ~(POLLERR | POLLNVAL);
+				continue;
+			}
 			fprintf(stderr, "Unexpected revents: "
 				"POLLERR/POLLNVAL(%x)\n", fds.revents);
 			return 5;
@@ -1064,6 +1072,8 @@ static void check_getpeername_connect(int fd)
 	socklen_t salen = sizeof(ss);
 	char a[INET6_ADDRSTRLEN];
 	char b[INET6_ADDRSTRLEN];
+	const char *iface;
+	size_t len;
 
 	if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
 		perror("getpeername");
@@ -1073,7 +1083,13 @@ static void check_getpeername_connect(int fd)
 	xgetnameinfo((struct sockaddr *)&ss, salen,
 		     a, sizeof(a), b, sizeof(b));
 
-	if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
+	iface = strchr(cfg_host, '%');
+	if (iface)
+		len = iface - cfg_host;
+	else
+		len = strlen(cfg_host) + 1;
+
+	if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b))
 		fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
 			cfg_host, a, cfg_port, b);
 }
@@ -1433,7 +1449,7 @@ static void parse_opts(int argc, char **argv)
 			 */
 			if (cfg_truncate < 0) {
 				cfg_rcv_trunc = true;
-				signal(SIGPIPE, handle_signal);
+				signal(SIGPIPE, SIG_IGN);
 			}
 			break;
 		case 'j':
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 47ecb5b3836e..a6447f7a31fe 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -375,81 +375,75 @@ do_transfer()
 		local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
 		local capopt="-i any -s 65535 -B 32768 ${capuser}"
 
-		ip netns exec ${listener_ns}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
+		ip netns exec ${listener_ns} tcpdump ${capopt} \
+			-w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
 		local cappid_listener=$!
 
-		ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
-		local cappid_connector=$!
+		if [ ${listener_ns} != ${connector_ns} ]; then
+			ip netns exec ${connector_ns} tcpdump ${capopt} \
+				-w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+			local cappid_connector=$!
+		fi
 
 		sleep 1
 	fi
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat -n
+	mptcp_lib_nstat_init "${listener_ns}"
 	if [ ${listener_ns} != ${connector_ns} ]; then
-		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-			nstat -n
-	fi
-
-	local stat_synrx_last_l
-	local stat_ackrx_last_l
-	local stat_cookietx_last
-	local stat_cookierx_last
-	local stat_csum_err_s
-	local stat_csum_err_c
-	local stat_tcpfb_last_l
-	stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
-	stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
-	stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
-	timeout ${timeout_test} \
-		ip netns exec ${listener_ns} \
-			./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-				$extra_args $local_addr < "$sin" > "$sout" &
+		mptcp_lib_nstat_init "${connector_ns}"
+	fi
+
+	ip netns exec ${listener_ns} \
+		./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+			$extra_args $local_addr < "$sin" > "$sout" &
 	local spid=$!
 
 	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	local start
 	start=$(date +%s%3N)
-	timeout ${timeout_test} \
-		ip netns exec ${connector_ns} \
-			./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-				$extra_args $connect_addr < "$cin" > "$cout" &
+	ip netns exec ${connector_ns} \
+		./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+			$extra_args $connect_addr < "$cin" > "$cout" &
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+		"${connector_ns}" "${port}" "${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	wait $cpid
 	local retc=$?
 	wait $spid
 	local rets=$?
 
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
 	local stop
 	stop=$(date +%s%3N)
 
 	if $capture; then
 		sleep 1
 		kill ${cappid_listener}
-		kill ${cappid_connector}
+		if [ ${listener_ns} != ${connector_ns} ]; then
+			kill ${cappid_connector}
+		fi
 	fi
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat | grep Tcp > /tmp/${listener_ns}.out
+	mptcp_lib_nstat_get "${listener_ns}"
 	if [ ${listener_ns} != ${connector_ns} ]; then
-		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-			nstat | grep Tcp > /tmp/${connector_ns}.out
+		mptcp_lib_nstat_get "${connector_ns}"
 	fi
 
 	local duration
 	duration=$((stop-start))
 	printf "(duration %05sms) " "${duration}"
-	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
 		mptcp_lib_pr_fail "client exit code $retc, server $rets"
-		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
-			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
 
 		echo
 		cat "$capout"
@@ -463,38 +457,38 @@ do_transfer()
 	rets=$?
 
 	local extra=""
-	local stat_synrx_now_l
-	local stat_ackrx_now_l
-	local stat_cookietx_now
-	local stat_cookierx_now
-	local stat_ooo_now
-	local stat_tcpfb_now_l
-	stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
-	stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
-	expect_synrx=$((stat_synrx_last_l))
-	expect_ackrx=$((stat_ackrx_last_l))
+	local stat_synrx
+	local stat_ackrx
+	local stat_cookietx
+	local stat_cookierx
+	local stat_ooo
+	local stat_tcpfb
+	stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+	stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+
+	expect_synrx=0
+	expect_ackrx=0
 
 	cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
 	cookies=${cookies##*=}
 
 	if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
-		expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
-		expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+		expect_synrx=${connect_per_transfer}
+		expect_ackrx=${connect_per_transfer}
 	fi
 
-	if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
-		mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+	if [ ${stat_synrx} -lt ${expect_synrx} ]; then
+		mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \
 				  "than expected (${expect_synrx})"
 		retc=1
 	fi
-	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
-		if [ ${stat_ooo_now} -eq 0 ]; then
-			mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+	if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then
+		if [ ${stat_ooo} -eq 0 ]; then
+			mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \
 					  "than expected (${expect_ackrx})"
 			rets=1
 		else
@@ -508,47 +502,45 @@ do_transfer()
 		csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
 		csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
 
-		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
-		if [ $csum_err_s_nr -gt 0 ]; then
-			mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
+		if [ $csum_err_s -gt 0 ]; then
+			mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]"
 			rets=1
 		fi
 
-		local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
-		if [ $csum_err_c_nr -gt 0 ]; then
-			mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
+		if [ $csum_err_c -gt 0 ]; then
+			mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]"
 			retc=1
 		fi
 	fi
 
-	if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+	if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then
 		mptcp_lib_pr_fail "unexpected fallback to TCP"
 		rets=1
 	fi
 
 	if [ $cookies -eq 2 ];then
-		if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+		if [ $stat_cookietx -eq 0 ] ;then
 			extra+=" WARN: CookieSent: did not advance"
 		fi
-		if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+		if [ $stat_cookierx -eq 0 ] ;then
 			extra+=" WARN: CookieRecv: did not advance"
 		fi
 	else
-		if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+		if [ $stat_cookietx -gt 0 ] ;then
 			extra+=" WARN: CookieSent: changed"
 		fi
-		if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+		if [ $stat_cookierx -gt 0 ] ;then
 			extra+=" WARN: CookieRecv: changed"
 		fi
 	fi
 
-	if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
+	if [ ${stat_synrx} -gt ${expect_synrx} ]; then
 		extra+=" WARN: SYNRX: expect ${expect_synrx},"
-		extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
+		extra+=" got ${stat_synrx} (probably retransmissions)"
 	fi
-	if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
+	if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then
 		extra+=" WARN: ACKRX: expect ${expect_ackrx},"
-		extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+		extra+=" got ${stat_ackrx} (probably retransmissions)"
 	fi
 
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index c90d8e8b95cb..b2e6e548f796 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -62,6 +62,7 @@ unset sflags
 unset fastclose
 unset fullmesh
 unset speed
+unset bind_addr
 unset join_syn_rej
 unset join_csum_ns1
 unset join_csum_ns2
@@ -645,6 +646,27 @@ wait_mpj()
 	done
 }
 
+wait_ll_ready()
+{
+	local ns="${1}"
+
+	local i
+	for i in $(seq 50); do
+		ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
+			grep -qw "tentative" || break
+		sleep 0.1
+	done
+}
+
+get_ll_addr()
+{
+	local ns="${1}"
+	local iface="${2}"
+
+	ip -n "${ns}" -6 addr show dev "${iface}" scope link |
+		grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#'
+}
+
 kill_events_pids()
 {
 	mptcp_lib_kill_wait $evts_ns1_pid
@@ -951,6 +973,9 @@ do_transfer()
 	local FAILING_LINKS=${FAILING_LINKS:-""}
 	local fastclose=${fastclose:-""}
 	local speed=${speed:-"fast"}
+	local bind_addr=${bind_addr:-"::"}
+	local listener_in="${sin}"
+	local connector_in="${cin}"
 	port=$(get_port)
 
 	:> "$cout"
@@ -958,10 +983,8 @@ do_transfer()
 
 	cond_start_capture ${listener_ns}
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat -n
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat -n
+	mptcp_lib_nstat_init "${listener_ns}"
+	mptcp_lib_nstat_init "${connector_ns}"
 
 	local extra_args
 	if [ $speed = "fast" ]; then
@@ -999,42 +1022,40 @@ do_transfer()
 
 	extra_srv_args="$extra_args $extra_srv_args"
 	if [ "$test_linkfail" -gt 1 ];then
-		timeout ${timeout_test} \
-			ip netns exec ${listener_ns} \
-				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_srv_args "::" < "$sinfail" > "$sout" &
-	else
-		timeout ${timeout_test} \
-			ip netns exec ${listener_ns} \
-				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_srv_args "::" < "$sin" > "$sout" &
+		listener_in="${sinfail}"
 	fi
+	ip netns exec ${listener_ns} \
+		./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \
+			${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" &
 	local spid=$!
 
 	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	extra_cl_args="$extra_args $extra_cl_args"
 	if [ "$test_linkfail" -eq 0 ];then
-		timeout ${timeout_test} \
-			ip netns exec ${connector_ns} \
-				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-					$extra_cl_args $connect_addr < "$cin" > "$cout" &
+		ip netns exec ${connector_ns} \
+			./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+				$extra_cl_args $connect_addr < "$cin" > "$cout" &
 	elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+		connector_in="${cinsent}"
 		( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
 			tee "$cinsent" | \
-			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
 						$extra_cl_args $connect_addr > "$cout" &
 	else
+		connector_in="${cinsent}"
 		tee "$cinsent" < "$cinfail" | \
-			timeout ${timeout_test} \
-				ip netns exec ${connector_ns} \
-					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-						$extra_cl_args $connect_addr > "$cout" &
+			ip netns exec ${connector_ns} \
+				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+					$extra_cl_args $connect_addr > "$cout" &
 	fi
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+		"${connector_ns}" "${port}" "${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
 	check_cestab $listener_ns $connector_ns
 
@@ -1043,31 +1064,26 @@ do_transfer()
 	wait $spid
 	local rets=$?
 
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
 	cond_stop_capture
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat | grep Tcp > /tmp/${listener_ns}.out
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat | grep Tcp > /tmp/${connector_ns}.out
+	mptcp_lib_nstat_get "${listener_ns}"
+	mptcp_lib_nstat_get "${connector_ns}"
 
-	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
 		fail_test "client exit code $retc, server $rets"
-		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
-			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
 		return 1
 	fi
 
-	if [ "$test_linkfail" -gt 1 ];then
-		check_transfer $sinfail $cout "file received by client" $trunc_size
-	else
-		check_transfer $sin $cout "file received by client" $trunc_size
-	fi
+	check_transfer $listener_in $cout "file received by client" $trunc_size
 	retc=$?
-	if [ "$test_linkfail" -eq 0 ];then
-		check_transfer $cin $sout "file received by server" $trunc_size
-	else
-		check_transfer $cinsent $sout "file received by server" $trunc_size
-	fi
+	check_transfer $connector_in $sout "file received by server" $trunc_size
 	rets=$?
 
 	[ $retc -eq 0 ] && [ $rets -eq 0 ]
@@ -1136,12 +1152,20 @@ run_tests()
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
 }
 
+_dump_stats()
+{
+	local ns="${1}"
+	local side="${2}"
+
+	mptcp_lib_print_err "${side} ns stats (${ns2})"
+	mptcp_lib_pr_nstat "${ns}"
+	echo
+}
+
 dump_stats()
 {
-	echo Server ns stats
-	ip netns exec $ns1 nstat -as | grep Tcp
-	echo Client ns stats
-	ip netns exec $ns2 nstat -as | grep Tcp
+	_dump_stats "${ns1}" "Server"
+	_dump_stats "${ns2}" "Client"
 }
 
 chk_csum_nr()
@@ -2324,7 +2348,7 @@ laminar_endp_tests()
 {
 	# no laminar endpoints: routing rules are used
 	if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT &&
-	   mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2336,7 +2360,7 @@ laminar_endp_tests()
 
 	# laminar endpoints: this endpoint is used
 	if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT &&
-	   mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2348,7 +2372,7 @@ laminar_endp_tests()
 
 	# laminar endpoints: these endpoints are used
 	if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT &&
-	   mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2363,7 +2387,7 @@ laminar_endp_tests()
 
 	# laminar endpoints: only one endpoint is used
 	if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT &&
-	   mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2376,7 +2400,7 @@ laminar_endp_tests()
 
 	# laminar endpoints: subflow and laminar flags
 	if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT &&
-	   mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
 		pm_nl_set_limits $ns1 0 4
 		pm_nl_set_limits $ns2 2 4
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
@@ -2532,7 +2556,7 @@ remove_tests()
 	if reset "remove single subflow"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 		addr_nr_ns2=-1 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 1 1 1
@@ -2545,8 +2569,8 @@ remove_tests()
 	if reset "remove multiple subflows"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
-		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 		addr_nr_ns2=-2 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 2 2 2
@@ -2557,7 +2581,7 @@ remove_tests()
 	# single address, remove
 	if reset "remove single address"; then
 		pm_nl_set_limits $ns1 0 1
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 1
 		addr_nr_ns1=-1 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2570,9 +2594,9 @@ remove_tests()
 	# subflow and signal, remove
 	if reset "remove subflow and signal"; then
 		pm_nl_set_limits $ns1 0 2
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 2
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 		addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 2 2 2
@@ -2584,10 +2608,10 @@ remove_tests()
 	# subflows and signal, remove
 	if reset "remove subflows and signal"; then
 		pm_nl_set_limits $ns1 0 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 3
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
 		addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
@@ -2599,9 +2623,9 @@ remove_tests()
 	# addresses remove
 	if reset "remove addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
 		pm_nl_set_limits $ns2 3 3
 		addr_nr_ns1=-3 speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2614,10 +2638,10 @@ remove_tests()
 	# invalid addresses remove
 	if reset "remove invalid addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
 		# broadcast IP: no packet for this address will be received on ns1
-		pm_nl_add_endpoint $ns1 224.0.0.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
 		pm_nl_set_limits $ns2 2 2
 		addr_nr_ns1=-3 speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2631,10 +2655,10 @@ remove_tests()
 	# subflows and signal, flush
 	if reset "flush subflows and signal"; then
 		pm_nl_set_limits $ns1 0 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 3
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
 		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
@@ -2647,9 +2671,9 @@ remove_tests()
 	if reset "flush subflows"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_set_limits $ns2 3 3
-		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
 		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
@@ -2666,9 +2690,9 @@ remove_tests()
 	# addresses flush
 	if reset "flush addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
 		pm_nl_set_limits $ns2 3 3
 		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2681,9 +2705,9 @@ remove_tests()
 	# invalid addresses flush
 	if reset "flush invalid addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup
 		pm_nl_set_limits $ns2 3 3
 		addr_nr_ns1=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2952,7 +2976,11 @@ mixed_tests()
 		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
 		speed=slow \
 			run_tests $ns1 $ns2 dead:beef:2::1
-		chk_join_nr 1 1 1
+		if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then
+			chk_join_nr 0 0 0
+		else
+			chk_join_nr 1 1 1
+		fi
 	fi
 
 	# fullmesh still tries to create all the possibly subflows with
@@ -3233,6 +3261,133 @@ add_addr_ports_tests()
 	fi
 }
 
+bind_tests()
+{
+	# bind to one address should not allow extra subflows to other addresses
+	if reset "bind main address v4, no join v4"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		bind_addr="10.0.1.1" \
+			run_tests $ns1 $ns2 10.0.1.1
+		join_syn_tx=1 \
+			chk_join_nr 0 0 0
+		chk_add_nr 1 1
+	fi
+
+	# bind to one address should not allow extra subflows to other addresses
+	if reset "bind main address v6, no join v6"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		bind_addr="dead:beef:1::1" \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		join_syn_tx=1 \
+			chk_join_nr 0 0 0
+		chk_add_nr 1 1
+	fi
+
+	# multiple binds to allow extra subflows to other addresses
+	if reset "multiple bind to allow joins v4"; then
+		local extra_bind
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+
+		# Launching another app listening on a different address
+		# Note: it could be a totally different app, e.g. nc, socat, ...
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP 10.0.2.1 &
+		extra_bind=$!
+
+		bind_addr="10.0.1.1" \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+
+	# multiple binds to allow extra subflows to other addresses
+	if reset "multiple bind to allow joins v6"; then
+		local extra_bind
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+
+		# Launching another app listening on a different address
+		# Note: it could be a totally different app, e.g. nc, socat, ...
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP dead:beef:2::1 &
+		extra_bind=$!
+
+		bind_addr="dead:beef:1::1" \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+
+	# multiple binds to allow extra subflows to other addresses: v6 LL case
+	if reset "multiple bind to allow joins v6 link-local routing"; then
+		local extra_bind ns1ll1 ns1ll2
+
+		ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+		ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+
+		wait_ll_ready $ns1 # to be able to bind
+		wait_ll_ready $ns2 # also needed to bind on the client side
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP "${ns1ll2}%ns1eth2" &
+		extra_bind=$!
+
+		bind_addr="${ns1ll1}%ns1eth1" \
+			run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+		# it is not possible to connect to the announced LL addr without
+		# specifying the outgoing interface.
+		join_connect_err=1 \
+			chk_join_nr 0 0 0
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+
+	# multiple binds to allow extra subflows to v6 LL addresses: laminar
+	if reset "multiple bind to allow joins v6 link-local laminar" &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+		local extra_bind ns1ll1 ns1ll2 ns2ll2
+
+		ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+		ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+		ns2ll2="$(get_ll_addr $ns2 ns2eth2)"
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+		pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2
+
+		wait_ll_ready $ns1 # to be able to bind
+		wait_ll_ready $ns2 # also needed to bind on the client side
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP "${ns1ll2}%ns1eth2" &
+		extra_bind=$!
+
+		bind_addr="${ns1ll1}%ns1eth1" \
+			run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+}
+
 syncookies_tests()
 {
 	# single subflow, syncookies
@@ -3500,7 +3655,6 @@ fullmesh_tests()
 fastclose_tests()
 {
 	if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
-		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=1024 fastclose=client \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 0 0 0
@@ -3509,7 +3663,6 @@ fastclose_tests()
 	fi
 
 	if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
-		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=1024 fastclose=server \
 			run_tests $ns1 $ns2 10.0.1.1
 		join_rst_nr=1 \
@@ -3806,7 +3959,7 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 2 2
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns1
@@ -3831,7 +3984,7 @@ userspace_tests()
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm create destroy subflow
@@ -3839,7 +3992,7 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns2
@@ -3859,7 +4012,7 @@ userspace_tests()
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm create id 0 subflow
@@ -3867,7 +4020,7 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns2
@@ -3880,7 +4033,7 @@ userspace_tests()
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 2 2
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm remove initial subflow
@@ -3888,7 +4041,7 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns2
@@ -3904,7 +4057,7 @@ userspace_tests()
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm send RM_ADDR for ID 0
@@ -3912,7 +4065,7 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 1 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 		wait_mpj $ns1
@@ -3930,7 +4083,7 @@ userspace_tests()
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 }
 
@@ -3939,11 +4092,11 @@ endpoint_tests()
 	# subflow_rebuild_header is needed to support the implicit flag
 	# userspace pm type prevents add_addr
 	if reset "implicit EP" &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-		{ speed=slow \
+		{ timeout_test=120 test_linkfail=128 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
@@ -3960,17 +4113,17 @@ endpoint_tests()
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
 		pm_nl_check_endpoint "modif is allowed" \
 			$ns2 10.0.2.2 id 1 flags signal
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
 		start_events
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_set_limits $ns2 0 3
 		pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
-		{ test_linkfail=4 speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
@@ -4015,7 +4168,7 @@ endpoint_tests()
 			chk_mptcp_info subflows 3 subflows 3
 		done
 
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 
 		kill_events_pids
 		chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -4040,7 +4193,7 @@ endpoint_tests()
 
 	# remove and re-add
 	if reset_with_events "delete re-add signal" &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
 		ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_set_limits $ns2 3 3
@@ -4048,7 +4201,7 @@ endpoint_tests()
 		# broadcast IP: no packet for this address will be received on ns1
 		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
 		pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
-		{ test_linkfail=4 speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
@@ -4057,39 +4210,46 @@ endpoint_tests()
 			$ns1 10.0.2.1 id 1 flags signal
 		chk_subflow_nr "before delete" 2
 		chk_mptcp_info subflows 1 subflows 1
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 1
 
 		pm_nl_del_endpoint $ns1 1 10.0.2.1
 		pm_nl_del_endpoint $ns1 2 224.0.0.1
 		sleep 0.5
 		chk_subflow_nr "after delete" 1
 		chk_mptcp_info subflows 0 subflows 0
+		chk_mptcp_info add_addr_signal 0 add_addr_accepted 0
 
 		pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
 		wait_mpj $ns2
 		chk_subflow_nr "after re-add" 3
 		chk_mptcp_info subflows 2 subflows 2
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
 
 		pm_nl_del_endpoint $ns1 42 10.0.1.1
 		sleep 0.5
 		chk_subflow_nr "after delete ID 0" 2
 		chk_mptcp_info subflows 2 subflows 2
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
 
 		pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
 		wait_mpj $ns2
 		chk_subflow_nr "after re-add ID 0" 3
 		chk_mptcp_info subflows 3 subflows 3
+		chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
 
 		pm_nl_del_endpoint $ns1 99 10.0.1.1
 		sleep 0.5
 		chk_subflow_nr "after re-delete ID 0" 2
 		chk_mptcp_info subflows 2 subflows 2
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
 
 		pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
 		wait_mpj $ns2
 		chk_subflow_nr "after re-re-add ID 0" 3
 		chk_mptcp_info subflows 3 subflows 3
-		mptcp_lib_kill_wait $tests_pid
+		chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
+		mptcp_lib_kill_group_wait $tests_pid
 
 		kill_events_pids
 		chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -4115,13 +4275,13 @@ endpoint_tests()
 
 	# flush and re-add
 	if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 1 2
 		# broadcast IP: no packet for this address will be received on ns1
 		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
 		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
-		{ test_linkfail=4 speed=20 \
+		{ timeout_test=120 test_linkfail=128 speed=20 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
@@ -4137,7 +4297,7 @@ endpoint_tests()
 		wait_mpj $ns2
 		pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
 		wait_mpj $ns2
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 
 		join_syn_tx=3 join_connect_err=1 \
 			chk_join_nr 2 2 2
@@ -4187,6 +4347,7 @@ all_tests_sorted=(
 	M@mixed_tests
 	b@backup_tests
 	p@add_addr_ports_tests
+	B@bind_tests
 	k@syncookies_tests
 	S@checksum_tests
 	d@deny_join_id0_tests
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index d62e653d48b0..5fea7e7df628 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -106,23 +106,32 @@ mptcp_lib_pr_info() {
 	mptcp_lib_print_info "INFO: ${*}"
 }
 
-# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file
+mptcp_lib_pr_nstat() {
+	local ns="${1}"
+	local hist="/tmp/${ns}.out"
+
+	if [ -f "${hist}" ]; then
+		awk '$2 != 0 { print "  "$0 }' "${hist}"
+	else
+		ip netns exec "${ns}" nstat -as | grep Tcp
+	fi
+}
+
+# $1-2: listener/connector ns ; $3 port
 mptcp_lib_pr_err_stats() {
 	local lns="${1}"
 	local cns="${2}"
 	local port="${3}"
-	local lstat="${4}"
-	local cstat="${5}"
 
 	echo -en "${MPTCP_LIB_COLOR_RED}"
 	{
 		printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}"
 		ip netns exec "${lns}" ss -Menitam -o "sport = :${port}"
-		cat "${lstat}"
+		mptcp_lib_pr_nstat "${lns}"
 
 		printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}"
 		ip netns exec "${cns}" ss -Menitam -o "dport = :${port}"
-		[ "${lstat}" != "${cstat}" ] && cat "${cstat}"
+		[ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}"
 	} 1>&2
 	echo -en "${MPTCP_LIB_COLOR_RESET}"
 }
@@ -341,6 +350,19 @@ mptcp_lib_evts_get_info() {
 		mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
 }
 
+mptcp_lib_wait_timeout() {
+	local timeout_test="${1}"
+	local listener_ns="${2}"
+	local connector_ns="${3}"
+	local port="${4}"
+	shift 4 # rest are PIDs
+
+	sleep "${timeout_test}"
+	mptcp_lib_print_err "timeout"
+	mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
+	kill "${@}" 2>/dev/null
+}
+
 # $1: PID
 mptcp_lib_kill_wait() {
 	[ "${1}" -eq 0 ] && return 0
@@ -350,19 +372,62 @@ mptcp_lib_kill_wait() {
 	wait "${1}" 2>/dev/null
 }
 
+# $1: PID
+mptcp_lib_pid_list_children() {
+	local curr="${1}"
+	# evoke 'ps' only once
+	local pids="${2:-"$(ps o pid,ppid)"}"
+
+	echo "${curr}"
+
+	local pid
+	for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do
+		mptcp_lib_pid_list_children "${pid}" "${pids}"
+	done
+}
+
+# $1: PID
+mptcp_lib_kill_group_wait() {
+	# Some users might not have procps-ng: cannot use "kill -- -PID"
+	mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null
+	wait "${1}" 2>/dev/null
+}
+
 # $1: IP address
 mptcp_lib_is_v6() {
 	[ -z "${1##*:*}" ]
 }
 
+mptcp_lib_nstat_init() {
+	local ns="${1}"
+
+	rm -f "/tmp/${ns}."{nstat,out}
+	NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n
+}
+
+mptcp_lib_nstat_get() {
+	local ns="${1}"
+
+	# filter out non-*TCP stats, and the rate (last column)
+	NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz |
+		grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out"
+}
+
 # $1: ns, $2: MIB counter
+# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available.
+# If not, get the counter from nstat ignoring any history.
 mptcp_lib_get_counter() {
 	local ns="${1}"
 	local counter="${2}"
+	local hist="/tmp/${ns}.out"
 	local count
 
-	count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
-		awk 'NR==1 {next} {print $2}')
+	if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then
+		count=$(awk "/^${counter} / {print \$2; exit}" "${hist}")
+	else
+		count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+			awk 'NR==1 {next} {print $2}')
+	fi
 	if [ -z "${count}" ]; then
 		mptcp_lib_fail_if_expected_feature "${counter} counter"
 		return 1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index f01989be6e9b..ab8bce06b262 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -169,41 +169,44 @@ do_transfer()
 		cmsg+=",TCPINQ"
 	fi
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat -n
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat -n
-
-	timeout ${timeout_test} \
-		ip netns exec ${listener_ns} \
-			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
-				${local_addr} < "$sin" > "$sout" &
+	mptcp_lib_nstat_init "${listener_ns}"
+	mptcp_lib_nstat_init "${connector_ns}"
+
+	ip netns exec ${listener_ns} \
+		$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
+			${local_addr} < "$sin" > "$sout" &
 	local spid=$!
 
-	sleep 1
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
-	timeout ${timeout_test} \
-		ip netns exec ${connector_ns} \
-			$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
-				$connect_addr < "$cin" > "$cout" &
+	ip netns exec ${connector_ns} \
+		$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
+			$connect_addr < "$cin" > "$cout" &
 
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+		"${connector_ns}" "${port}" "${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	wait $cpid
 	local retc=$?
 	wait $spid
 	local rets=$?
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat | grep Tcp > /tmp/${listener_ns}.out
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat | grep Tcp > /tmp/${connector_ns}.out
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
+	mptcp_lib_nstat_get "${listener_ns}"
+	mptcp_lib_nstat_get "${connector_ns}"
 
 	print_title "Transfer ${ip:2}"
-	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
 		mptcp_lib_pr_fail "client exit code $retc, server $rets"
-		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
-			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
 
 		mptcp_lib_result_fail "transfer ${ip}"
 
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 1903e8e84a31..806aaa7d2d61 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -155,48 +155,53 @@ do_transfer()
 		sleep 1
 	fi
 
-	NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
-		nstat -n
-	NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
-		nstat -n
-
-	timeout ${timeout_test} \
-		ip netns exec ${ns3} \
-			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
-				0.0.0.0 < "$sin" > "$sout" &
+	mptcp_lib_nstat_init "${ns3}"
+	mptcp_lib_nstat_init "${ns1}"
+
+	ip netns exec ${ns3} \
+		./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
+			0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
 	mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
 
-	timeout ${timeout_test} \
-		ip netns exec ${ns1} \
-			./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
-				10.0.3.3 < "$cin" > "$cout" &
+	ip netns exec ${ns1} \
+		./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
+			10.0.3.3 < "$cin" > "$cout" &
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \
+		"${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	wait $cpid
 	local retc=$?
 	wait $spid
 	local rets=$?
 
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
 	if $capture; then
 		sleep 1
 		kill ${cappid_listener}
 		kill ${cappid_connector}
 	fi
 
-	NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
-		nstat | grep Tcp > /tmp/${ns3}.out
-	NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
-		nstat | grep Tcp > /tmp/${ns1}.out
+	mptcp_lib_nstat_get "${ns3}"
+	mptcp_lib_nstat_get "${ns1}"
 
 	cmp $sin $cout > /dev/null 2>&1
 	local cmps=$?
 	cmp $cin $sout > /dev/null 2>&1
 	local cmpc=$?
 
-	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
-	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+	if [ $retc -eq 0 ] && [ $rets -eq 0 ] &&
+	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] &&
+	   [ $timeout_pid -eq 0 ]; then
 		printf "%-16s" " max $max_time "
 		mptcp_lib_pr_ok
 		cat "$capout"
@@ -204,8 +209,7 @@ do_transfer()
 	fi
 
 	mptcp_lib_pr_fail "client exit code $retc, server $rets"
-	mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \
-		"/tmp/${ns3}.out" "/tmp/${ns1}.out"
+	mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}"
 	ls -l $sin $cout
 	ls -l $cin $sout
 
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 87323942cb8a..e9ae1806ab07 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -211,7 +211,8 @@ make_connection()
 	ip netns exec "$ns1" \
 	   ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
 	local server_pid=$!
-	sleep 0.5
+
+	mptcp_lib_wait_local_port_listen "${ns1}" "${port}"
 
 	# Run the client, transfer $file and stay connected to the server
 	# to conduct tests
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index 45832df98295..a68bc882fa4e 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -127,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
 ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
 ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
 
+ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 for i in 0 1; do
   ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
   ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
@@ -153,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1
 ip -net "$ns2" route add default via dead:2::1
 
 ip -net "$nsr1" route add default via 192.168.10.2
+ip -6 -net "$nsr1" route add default via fee1:2::2
 ip -net "$nsr2" route add default via 192.168.10.1
+ip -6 -net "$nsr2" route add default via fee1:2::1
 
 ip netns exec "$nsr1" nft -f - <<EOF
 table inet filter {
@@ -352,8 +356,9 @@ test_tcp_forwarding_ip()
 	local nsa=$1
 	local nsb=$2
 	local pmtu=$3
-	local dstip=$4
-	local dstport=$5
+	local proto=$4
+	local dstip=$5
+	local dstport=$6
 	local lret=0
 	local socatc
 	local socatl
@@ -363,12 +368,14 @@ test_tcp_forwarding_ip()
 		infile="$nsin_small"
 	fi
 
-	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
+	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \
+            TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
 	lpid=$!
 
 	busywait 1000 listener_ready
 
-	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \
+            TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
 	socatc=$?
 
 	wait $lpid
@@ -394,8 +401,11 @@ test_tcp_forwarding_ip()
 test_tcp_forwarding()
 {
 	local pmtu="$3"
+	local proto="$4"
+	local dstip="$5"
+	local dstport="$6"
 
-	test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
 
 	return $?
 }
@@ -403,6 +413,9 @@ test_tcp_forwarding()
 test_tcp_forwarding_set_dscp()
 {
 	local pmtu="$3"
+	local proto="$4"
+	local dstip="$5"
+	local dstport="$6"
 
 ip netns exec "$nsr1" nft -f - <<EOF
 table netdev dscpmangle {
@@ -413,7 +426,7 @@ table netdev dscpmangle {
 }
 EOF
 if [ $? -eq 0 ]; then
-	test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
 	check_dscp "dscp_ingress" "$pmtu"
 
 	ip netns exec "$nsr1" nft delete table netdev dscpmangle
@@ -430,7 +443,7 @@ table netdev dscpmangle {
 }
 EOF
 if [ $? -eq 0 ]; then
-	test_tcp_forwarding_ip "$1" "$2" "$pmtu"  10.0.2.99 12345
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
 	check_dscp "dscp_egress" "$pmtu"
 
 	ip netns exec "$nsr1" nft delete table netdev dscpmangle
@@ -441,7 +454,7 @@ fi
 	# partial.  If flowtable really works, then both dscp-is-0 and dscp-is-cs3
 	# counters should have seen packets (before and after ft offload kicks in).
 	ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
-	test_tcp_forwarding_ip "$1" "$2" "$pmtu"  10.0.2.99 12345
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
 	check_dscp "dscp_fwd" "$pmtu"
 }
 
@@ -455,7 +468,7 @@ test_tcp_forwarding_nat()
 
 	[ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
 
-	test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
+	test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345
 	lret=$?
 
 	if [ "$lret" -eq 0 ] ; then
@@ -465,7 +478,7 @@ test_tcp_forwarding_nat()
 			echo "PASS: flow offload for ns1/ns2 with masquerade $what"
 		fi
 
-		test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
+		test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666
 		lret=$?
 		if [ "$pmtu" -eq 1 ] ;then
 			check_counters "flow offload for ns1/ns2 with dnat $what"
@@ -487,7 +500,7 @@ make_file "$nsin_small" "$filesize_small"
 # Due to MTU mismatch in both directions, all packets (except small packets like pure
 # acks) have to be handled by normal forwarding path.  Therefore, packet counters
 # are not checked.
-if test_tcp_forwarding "$ns1" "$ns2" 0; then
+if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
 	echo "PASS: flow offloaded for ns1/ns2"
 else
 	echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -495,6 +508,14 @@ else
 	ret=1
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then
+	echo "PASS: IPv6 flow offloaded for ns1/ns2"
+else
+	echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
 # delete default route, i.e. ns2 won't be able to reach ns1 and
 # will depend on ns1 being masqueraded in nsr1.
 # expect ns1 has nsr1 address.
@@ -520,7 +541,7 @@ table ip nat {
 EOF
 
 check_dscp "dscp_none" "0"
-if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
 	echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
 	exit 0
 fi
@@ -546,7 +567,7 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
 ip netns exec "$ns2"  nft reset counters table inet filter >/dev/null
 
-if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
 	echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
 	exit 0
 fi
@@ -558,6 +579,73 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
 	ip netns exec "$nsr1" nft list ruleset
 fi
 
+# IPIP tunnel test:
+# Add IPIP tunnel interfaces and check flowtable acceleration.
+test_ipip() {
+if ! ip -net "$nsr1" link add name tun0 type ipip \
+     local 192.168.10.1 remote 192.168.10.2 >/dev/null;then
+	echo "SKIP: could not add ipip tunnel"
+	[ "$ret" -eq 0 ] && ret=$ksft_skip
+	return
+fi
+ip -net "$nsr1" link set tun0 up
+ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
+ip -net "$nsr2" link set tun0 up
+ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr1" route change default via 192.168.100.2
+ip -net "$nsr2" route change default via 192.168.100.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward \
+	'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
+	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+# Create vlan tagged devices for IPIP traffic.
+ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
+ip -net "$nsr1" link set veth1.10 up
+ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
+ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun1 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+ip -net "$nsr1" route change default via 192.168.200.2
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+
+ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr2" link set veth0.10 up
+ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun1 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+ip -net "$nsr2" route change default via 192.168.200.1
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
+	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+# Restore the previous configuration
+ip -net "$nsr1" route change default via 192.168.10.2
+ip -net "$nsr2" route change default via 192.168.10.1
+ip -net "$ns2" route del default via 10.0.2.1
+}
+
 # Another test:
 # Add bridge interface br0 to Router1, with NAT enabled.
 test_bridge() {
@@ -643,6 +731,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
 ip -net "$nsr1" link set up dev veth0
 }
 
+test_ipip
+
 test_bridge
 
 KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
@@ -683,7 +773,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
 ip -net "$ns2" route add default via 10.0.2.1
 ip -net "$ns2" route add default via dead:2::1
 
-if test_tcp_forwarding "$ns1" "$ns2" 1; then
+if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
 	check_counters "ipsec tunnel mode for ns1/ns2"
 else
 	echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -691,6 +781,14 @@ else
 	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	check_counters "IPv6 ipsec tunnel mode for ns1/ns2"
+else
+	echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2"
+	ip netns exec "$nsr1" nft list ruleset 1>&2
+	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
 if [ "$1" = "" ]; then
 	low=1280
 	mtu=$((65536 - low))
diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
index 21bb1cfd8a85..b282d1785c9b 100644
--- a/tools/testing/selftests/net/netfilter/sctp_collision.c
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
@@ -9,9 +9,10 @@
 int main(int argc, char *argv[])
 {
 	struct sockaddr_in saddr = {}, daddr = {};
-	int sd, ret, len = sizeof(daddr);
+	socklen_t len = sizeof(daddr);
 	struct timeval tv = {25, 0};
 	char buf[] = "hello";
+	int sd, ret;
 
 	if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
 		printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c
index 7618ebe528a4..679b6c77ace7 100644
--- a/tools/testing/selftests/net/netlink-dumps.c
+++ b/tools/testing/selftests/net/netlink-dumps.c
@@ -143,6 +143,7 @@ TEST(dump_extack)
 	EXPECT_EQ(n, -1);
 	EXPECT_EQ(errno, ENOBUFS);
 
+	ret = NO_CTRL;
 	for (i = 0; i < cnt; i++) {
 		struct ext_ack ea = {};
 
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
new file mode 100644
index 000000000000..47550df124ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test SYN+ACK RTX with 1s RTO.
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000`
+
+//
+// Test 1: TFO SYN+ACK
+//
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+   +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+   +0 close(4) = 0
+   +0 close(3) = 0
+
+
+//
+// Test 2: non-TFO SYN+ACK
+//
+   +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+   +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+   +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+   +0 close(4) = 0
+   +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
index b2b2cdf27e20..454441e7ecff 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
@@ -1,6 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 // Test that we correctly skip zero-length IOVs.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
+
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
    +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
    +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
index 183051ba0cae..6882b8240a8a 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
@@ -23,14 +23,16 @@
 
 // install a qdisc dropping all packets
    +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0`
+
    +0 write(4, ..., 24) = 24
    // When qdisc is congested we retry every 500ms
    // (TCP_RESOURCE_PROBE_INTERVAL) and therefore
    // we retry 6 times before hitting 3s timeout.
    // First verify that the connection is alive:
-+3.250 write(4, ..., 24) = 24
++3 write(4, ..., 24) = 24
+
    // Now verify that shortly after that the socket is dead:
- +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
 
    +0 %{ assert tcpi_probes == 6, tcpi_probes; \
          assert tcpi_backoff == 0, tcpi_backoff }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
index a82c8899d36b..0a0700afdaa3 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -4,6 +4,8 @@
 // send a packet with MSG_ZEROCOPY and receive the notification ID
 // repeat and verify IDs are consecutive
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
index c01915e7f4a1..df91675d2991 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -3,6 +3,8 @@
 //
 // send multiple packets, then read one range of all notifications.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
index 6509882932e9..2963cfcb14df 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 // Minimal client-side zerocopy test
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
index 2cd78755cb2a..ea0c2fa73c2d 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -7,6 +7,8 @@
 // First send on a closed socket and wait for (absent) notification.
 // Then connect and send and verify that notification nr. is zero.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
index 7671c20e01cf..4df978a9b82e 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -7,6 +7,9 @@
 // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
 // is correctly fired only once, when EPOLLET is set. send another packet with
 // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
index fadc480fdb7f..36b6edc4858c 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -8,6 +8,9 @@
 // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
 // is correctly fired only once, when EPOLLET is set. send another packet with
 // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
index 5bfa0d1d2f4a..1bea6f3b4558 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -8,6 +8,9 @@
 // is correctly fired only once, when EPOLLONESHOT is set. send another packet
 // with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
 // confirm that EPOLLERR is correctly set.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
index 4a73bbf46961..e27c21ff5d18 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -8,6 +8,8 @@
 // one will have no data in the initial send. On return 0 the
 // zerocopy notification counter is not incremented. Verify this too.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
 // Send a FastOpen request, no cookie yet so no data in SYN
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
index 36086c5877ce..b1fa77c77dfa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -4,6 +4,8 @@
 // send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
 // kernel returns the notification ID.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh
  ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
 
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
index 672f817faca0..2f5317d0a9fa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -7,6 +7,8 @@
 //    because each iovec element becomes a frag
 // 3) the PSH bit is set on an skb when it runs out of fragments
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
index a9a1ac0aea4f..9d5272c6b207 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -4,6 +4,8 @@
 // verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
 // packets of all sizes, including the smallest payload, 1B.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index dbf77513f617..248c2b91fe42 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -8,6 +8,7 @@ ALL_TESTS="
 	kci_test_polrouting
 	kci_test_route_get
 	kci_test_addrlft
+	kci_test_addrlft_route_cleanup
 	kci_test_promote_secondaries
 	kci_test_tc
 	kci_test_gre
@@ -323,6 +324,25 @@ kci_test_addrlft()
 	end_test "PASS: preferred_lft addresses have expired"
 }
 
+kci_test_addrlft_route_cleanup()
+{
+	local ret=0
+	local test_addr="2001:db8:99::1/64"
+	local test_prefix="2001:db8:99::/64"
+
+	run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300
+	run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy"
+	run_cmd ip -6 addr del $test_addr dev "$devdummy"
+	run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy"
+
+	if [ $ret -ne 0 ]; then
+		end_test "FAIL: route not cleaned up when address with valid_lft deleted"
+		return 1
+	fi
+
+	end_test "PASS: route cleaned up when address with valid_lft deleted"
+}
+
 kci_test_promote_secondaries()
 {
 	run_cmd ifconfig "$devdummy"
@@ -1466,6 +1486,8 @@ usage: ${0##*/} OPTS
 EOF
 }
 
+require_command jq
+
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	end_test "SKIP: Need root privileges"
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
index f02f1f95d227..a04dac0b8027 100644
--- a/tools/testing/selftests/net/sctp_hello.c
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len
 static int do_client(int argc, char *argv[])
 {
 	struct sockaddr_storage ss;
-	char buf[] = "hello";
 	int csk, ret, len;
 
 	if (argc < 5) {
@@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[])
 
 	set_addr(&ss, argv[3], argv[4], &len);
 	ret = connect(csk, (struct sockaddr *)&ss, len);
-	if (ret < 0) {
-		printf("failed to connect to peer\n");
+	if (ret < 0)
 		return -1;
-	}
 
-	ret = send(csk, buf, strlen(buf) + 1, 0);
-	if (ret < 0) {
-		printf("failed to send msg %d\n", ret);
-		return -1;
-	}
+	recv(csk, NULL, 0, 0);
 	close(csk);
 
 	return 0;
@@ -75,7 +68,6 @@ int main(int argc, char *argv[])
 {
 	struct sockaddr_storage ss;
 	int lsk, csk, ret, len;
-	char buf[20];
 
 	if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
 		printf("%s server|client ...\n", argv[0]);
@@ -125,11 +117,6 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
-	ret = recv(csk, buf, sizeof(buf), 0);
-	if (ret <= 0) {
-		printf("failed to recv msg %d\n", ret);
-		return -1;
-	}
 	close(csk);
 	close(lsk);
 
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
index c854034b6aa1..667b211aa8a1 100755
--- a/tools/testing/selftests/net/sctp_vrf.sh
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -20,9 +20,9 @@ setup() {
 	modprobe sctp_diag
 	setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
 
-	ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
-	ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
-	ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+	ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0
 
 	ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
 	ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
@@ -62,17 +62,40 @@ setup() {
 }
 
 cleanup() {
-	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	wait_client $CLIENT_NS1
+	wait_client $CLIENT_NS2
+	stop_server
 	cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
 }
 
-wait_server() {
+start_server() {
 	local IFACE=$1
 	local CNT=0
 
-	until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
-		grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
-		[ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE &
+	disown
+	until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do
+		[ $((CNT++)) -eq 30 ] && { RET=3; return $RET; }
+		sleep 0.1
+	done
+}
+
+stop_server() {
+	local CNT=0
+
+	ip netns exec $SERVER_NS pkill sctp_hello
+	while ip netns exec $SERVER_NS ss -SaH | grep -q .; do
+		[ $((CNT++)) -eq 30 ] && break
+		sleep 0.1
+	done
+}
+
+wait_client() {
+	local CLIENT_NS=$1
+	local CNT=0
+
+	while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do
+		[ $((CNT++)) -eq 30 ] && break
 		sleep 0.1
 	done
 }
@@ -81,14 +104,12 @@ do_test() {
 	local CLIENT_NS=$1
 	local IFACE=$2
 
-	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
-	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
-		$SERVER_PORT $IFACE 2>&1 >/dev/null &
-	disown
-	wait_server $IFACE || return $RET
+	start_server $IFACE || return $RET
 	timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
-		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
 	RET=$?
+	wait_client $CLIENT_NS
+	stop_server
 	return $RET
 }
 
@@ -96,25 +117,21 @@ do_testx() {
 	local IFACE1=$1
 	local IFACE2=$2
 
-	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
-	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
-		$SERVER_PORT $IFACE1 2>&1 >/dev/null &
-	disown
-	wait_server $IFACE1 || return $RET
-	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
-		$SERVER_PORT $IFACE2 2>&1 >/dev/null &
-	disown
-	wait_server $IFACE2 || return $RET
+	start_server $IFACE1 || return $RET
+	start_server $IFACE2 || return $RET
 	timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
-		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \
 	timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
-		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
 	RET=$?
+	wait_client $CLIENT_NS1
+	wait_client $CLIENT_NS2
+	stop_server
 	return $RET
 }
 
 testup() {
-	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+	ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1
 	echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
 	do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
@@ -123,7 +140,7 @@ testup() {
 	do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
 
-	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+	ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0
 	echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
 	do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
@@ -160,7 +177,7 @@ testup() {
 	do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
 
-	echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+	echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y "
 	do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
 }
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
deleted file mode 100644
index 2070b57849de..000000000000
--- a/tools/testing/selftests/net/setup_loopback.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
-readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
-readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
-readonly HARD_IRQS="$(< ${IRQ_PATH})"
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-netdev_check_for_carrier() {
-	local -r dev="$1"
-
-	for i in {1..5}; do
-		carrier="$(cat /sys/class/net/${dev}/carrier)"
-		if [[ "${carrier}" -ne 1 ]] ; then
-			echo "carrier not ready yet..." >&2
-			sleep 1
-		else
-			echo "carrier ready" >&2
-			break
-		fi
-	done
-	echo "${carrier}"
-}
-
-# Assumes that there is no existing ipvlan device on the physical device
-setup_loopback_environment() {
-	local dev="$1"
-
-	# Fail hard if cannot turn on loopback mode for current NIC
-	ethtool -K "${dev}" loopback on || exit 1
-	sleep 1
-
-	# Check for the carrier
-	carrier=$(netdev_check_for_carrier ${dev})
-	if [[ "${carrier}" -ne 1 ]] ; then
-		echo "setup_loopback_environment failed"
-		exit 1
-	fi
-}
-
-setup_macvlan_ns(){
-	local -r link_dev="$1"
-	local -r ns_name="$2"
-	local -r ns_dev="$3"
-	local -r ns_mac="$4"
-	local -r addr="$5"
-
-	ip link add link "${link_dev}" dev "${ns_dev}" \
-		address "${ns_mac}" type macvlan
-	exit_code=$?
-	if [[ "${exit_code}" -ne 0 ]]; then
-		echo "setup_macvlan_ns failed"
-		exit $exit_code
-	fi
-
-	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
-	ip link set dev "${ns_dev}" netns "${ns_name}"
-	ip -netns "${ns_name}" link set dev "${ns_dev}" up
-	if [[ -n "${addr}" ]]; then
-		ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
-	fi
-
-	sleep 1
-}
-
-cleanup_macvlan_ns(){
-	while (( $# >= 2 )); do
-		ns_name="$1"
-		ns_dev="$2"
-		ip -netns "${ns_name}" link del dev "${ns_dev}"
-		ip netns del "${ns_name}"
-		shift 2
-	done
-}
-
-cleanup_loopback(){
-	local -r dev="$1"
-
-	ethtool -K "${dev}" loopback off
-	sleep 1
-
-	# Check for the carrier
-	carrier=$(netdev_check_for_carrier ${dev})
-	if [[ "${carrier}" -ne 1 ]] ; then
-		echo "setup_loopback_environment failed"
-		exit 1
-	fi
-}
-
-setup_interrupt() {
-	# Use timer on  host to trigger the network stack
-	# Also disable device interrupt to not depend on NIC interrupt
-	# Reduce test flakiness caused by unexpected interrupts
-	echo 100000 >"${FLUSH_PATH}"
-	echo 50 >"${IRQ_PATH}"
-}
-
-setup_ns() {
-	# Set up server_ns namespace and client_ns namespace
-	setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
-	setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
-	cleanup_macvlan_ns ${server_ns} server ${client_ns} client
-}
-
-setup() {
-	setup_loopback_environment "${dev}"
-	setup_interrupt
-}
-
-cleanup() {
-	cleanup_loopback "${dev}"
-
-	echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
-	echo "${HARD_IRQS}" >"${IRQ_PATH}"
-}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
deleted file mode 100644
index 152bf4c65747..000000000000
--- a/tools/testing/selftests/net/setup_veth.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-setup_veth_ns() {
-	local -r link_dev="$1"
-	local -r ns_name="$2"
-	local -r ns_dev="$3"
-	local -r ns_mac="$4"
-
-	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
-	echo 200000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
-	echo 1 > "/sys/class/net/${ns_dev}/napi_defer_hard_irqs"
-	ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
-	ip -netns "${ns_name}" link set dev "${ns_dev}" up
-
-	ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
-}
-
-setup_ns() {
-	# Set up server_ns namespace and client_ns namespace
-	ip link add name server type veth peer name client
-
-	setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
-	setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
-	local ns_name
-
-	for ns_name in ${client_ns} ${server_ns}; do
-		[[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
-	done
-}
-
-setup() {
-	# no global init setup step needed
-	:
-}
-
-cleanup() {
-	cleanup_ns
-}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 8457b7ccbc09..b76df1efc2ef 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt)
 	msg.msg_controllen = sizeof(control);
 
 	while (1) {
-		const char *reason;
+		const char *reason = NULL;
 
 		ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
 		if (ret == -1 && errno == EAGAIN)
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index e788b84551ca..da1b50b30719 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -564,6 +564,40 @@ TEST_F(tls, msg_more)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+TEST_F(tls, cmsg_msg_more)
+{
+	char *test_str =  "test_read";
+	char record_type = 100;
+	int send_len = 10;
+
+	/* we don't allow MSG_MORE with non-DATA records */
+	EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len,
+				MSG_MORE), -1);
+	EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(tls, msg_more_then_cmsg)
+{
+	char *test_str = "test_read";
+	char record_type = 100;
+	int send_len = 10;
+	char buf[10 * 2];
+	int ret;
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+
+	ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0);
+	EXPECT_EQ(ret, send_len);
+
+	/* initial DATA record didn't get merged with the non-DATA record */
+	EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len);
+
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
+}
+
 TEST_F(tls, msg_more_unsent)
 {
 	char const *test_str = "test_read";
@@ -912,6 +946,37 @@ TEST_F(tls, peek_and_splice)
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
+#define MAX_FRAGS 48
+TEST_F(tls, splice_short)
+{
+	struct iovec sendchar_iov;
+	char read_buf[0x10000];
+	char sendbuf[0x100];
+	char sendchar = 'S';
+	int pipefds[2];
+	int i;
+
+	sendchar_iov.iov_base = &sendchar;
+	sendchar_iov.iov_len = 1;
+
+	memset(sendbuf, 's', sizeof(sendbuf));
+
+	ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0);
+	ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0);
+
+	for (i = 0; i < MAX_FRAGS; i++)
+		ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0);
+
+	ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf));
+
+	EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0),
+		  MAX_FRAGS + sizeof(sendbuf));
+	EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf));
+	EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1);
+	EXPECT_EQ(errno, EAGAIN);
+}
+#undef MAX_FRAGS
+
 TEST_F(tls, recvmsg_single)
 {
 	char const *test_str = "test_recvmsg_single";
@@ -2791,6 +2856,147 @@ TEST_F(tls_err, oob_pressure)
 		EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
 }
 
+/*
+ * Parse a stream of TLS records and ensure that each record respects
+ * the specified @max_payload_len.
+ */
+static size_t parse_tls_records(struct __test_metadata *_metadata,
+				const __u8 *rx_buf, int rx_len, int overhead,
+				__u16 max_payload_len)
+{
+	const __u8 *rec = rx_buf;
+	size_t total_plaintext_rx = 0;
+	const __u8 rec_header_len = 5;
+
+	while (rec < rx_buf + rx_len) {
+		__u16 record_payload_len;
+		__u16 plaintext_len;
+
+		/* Sanity check that it's a TLS header for application data */
+		ASSERT_EQ(rec[0], 23);
+		ASSERT_EQ(rec[1], 0x3);
+		ASSERT_EQ(rec[2], 0x3);
+
+		memcpy(&record_payload_len, rec + 3, 2);
+		record_payload_len = ntohs(record_payload_len);
+		ASSERT_GE(record_payload_len, overhead);
+
+		plaintext_len = record_payload_len - overhead;
+		total_plaintext_rx += plaintext_len;
+
+		/* Plaintext must not exceed the specified limit */
+		ASSERT_LE(plaintext_len, max_payload_len);
+		rec += rec_header_len + record_payload_len;
+	}
+
+	return total_plaintext_rx;
+}
+
+TEST(tls_12_tx_max_payload_len)
+{
+	struct tls_crypto_info_keys tls12;
+	int cfd, ret, fd, overhead;
+	size_t total_plaintext_rx = 0;
+	__u8 tx[1024], rx[2000];
+	__u16 limit = 128;
+	__u16 opt = 0;
+	unsigned int optlen = sizeof(opt);
+	bool notls;
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+			     &tls12, 0);
+
+	ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+	if (notls)
+		exit(KSFT_SKIP);
+
+	/* Don't install keys on fd, we'll parse raw records */
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+			 sizeof(limit));
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen);
+	EXPECT_EQ(ret, 0);
+	EXPECT_EQ(limit, opt);
+	EXPECT_EQ(optlen, sizeof(limit));
+
+	memset(tx, 0, sizeof(tx));
+	ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx));
+	close(cfd);
+
+	ret = recv(fd, rx, sizeof(rx), 0);
+
+	/*
+	 * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+	 * need it to walk the record stream
+	 */
+	overhead = 16 + 8;
+	total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+					       limit);
+
+	ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+	close(fd);
+}
+
+TEST(tls_12_tx_max_payload_len_open_rec)
+{
+	struct tls_crypto_info_keys tls12;
+	int cfd, ret, fd, overhead;
+	size_t total_plaintext_rx = 0;
+	__u8 tx[1024], rx[2000];
+	__u16 tx_partial = 256;
+	__u16 og_limit = 512, limit = 128;
+	bool notls;
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+			     &tls12, 0);
+
+	ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+	if (notls)
+		exit(KSFT_SKIP);
+
+	/* Don't install keys on fd, we'll parse raw records */
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit,
+			 sizeof(og_limit));
+	ASSERT_EQ(ret, 0);
+
+	memset(tx, 0, sizeof(tx));
+	ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial);
+
+	/*
+	 * Changing the payload limit with a pending open record should
+	 * not be allowed.
+	 */
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+			 sizeof(limit));
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EBUSY);
+
+	ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR),
+		  sizeof(tx) - tx_partial);
+	close(cfd);
+
+	ret = recv(fd, rx, sizeof(rx), 0);
+
+	/*
+	 * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+	 * need it to walk the record stream
+	 */
+	overhead = 16 + 8;
+	total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+					       og_limit);
+	ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+	close(fd);
+}
+
 TEST(non_established) {
 	struct tls12_crypto_info_aes_gcm_256 tls12;
 	struct sockaddr_in addr;
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
deleted file mode 100755
index 8ff172f7bb1b..000000000000
--- a/tools/testing/selftests/net/toeplitz.sh
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
-# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
-# ('-rps <rps_map>')
-#
-# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
-# which is a driver-specific encoding.
-#
-# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
-# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
-
-source setup_loopback.sh
-readonly SERVER_IP4="192.168.1.200/24"
-readonly SERVER_IP6="fda8::1/64"
-readonly SERVER_MAC="aa:00:00:00:00:02"
-
-readonly CLIENT_IP4="192.168.1.100/24"
-readonly CLIENT_IP6="fda8::2/64"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-
-PORT=8000
-KEY="$(</proc/sys/net/core/netdev_rss_key)"
-TEST_RSS=false
-RPS_MAP=""
-PROTO_FLAG=""
-IP_FLAG=""
-DEV="eth0"
-
-# Return the number of rxqs among which RSS is configured to spread packets.
-# This is determined by reading the RSS indirection table using ethtool.
-get_rss_cfg_num_rxqs() {
-	echo $(ethtool -x "${DEV}" |
-		grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
-		cut -d: -f2- |
-		awk '{$1=$1};1' |
-		tr ' ' '\n' |
-		sort -u |
-		wc -l)
-}
-
-# Return a list of the receive irq handler cpus.
-# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
-# Reads /sys/kernel/irq/ in order, so algorithm depends on
-# irq_{rxq-0} < irq_{rxq-1}, etc.
-get_rx_irq_cpus() {
-	CPUS=""
-	# sort so that irq 2 is read before irq 10
-	SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
-	# Consider only as many queues as RSS actually uses. We assume that
-	# if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
-	RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
-	RXQ_COUNT=0
-
-	for i in ${SORTED_IRQS}
-	do
-		[[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
-		# lookup relevant IRQs by action name
-		[[ -e "$i/actions" ]] || continue
-		cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
-		irqname=$(<"$i/actions")
-
-		# does the IRQ get called
-		irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
-		[[ -n "${irqcount}" ]] || continue
-
-		# lookup CPU
-		irq=$(basename "$i")
-		cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
-
-		if [[ -z "${CPUS}" ]]; then
-			CPUS="${cpu}"
-		else
-			CPUS="${CPUS},${cpu}"
-		fi
-		RXQ_COUNT=$((RXQ_COUNT+1))
-	done
-
-	echo "${CPUS}"
-}
-
-get_disable_rfs_cmd() {
-	echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
-}
-
-get_set_rps_bitmaps_cmd() {
-	CMD=""
-	for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
-	do
-		CMD="${CMD} echo $1 > ${i};"
-	done
-
-	echo "${CMD}"
-}
-
-get_disable_rps_cmd() {
-	echo "$(get_set_rps_bitmaps_cmd 0)"
-}
-
-die() {
-	echo "$1"
-	exit 1
-}
-
-check_nic_rxhash_enabled() {
-	local -r pattern="receive-hashing:\ on"
-
-	ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
-}
-
-parse_opts() {
-	local prog=$0
-	shift 1
-
-	while [[ "$1" =~ "-" ]]; do
-		if [[ "$1" = "-irq_prefix" ]]; then
-			shift
-			IRQ_PATTERN="^$1-[0-9]*$"
-		elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
-			PROTO_FLAG="$1"
-		elif [[ "$1" = "-4" ]]; then
-			IP_FLAG="$1"
-			SERVER_IP="${SERVER_IP4}"
-			CLIENT_IP="${CLIENT_IP4}"
-		elif [[ "$1" = "-6" ]]; then
-			IP_FLAG="$1"
-			SERVER_IP="${SERVER_IP6}"
-			CLIENT_IP="${CLIENT_IP6}"
-		elif [[ "$1" = "-rss" ]]; then
-			TEST_RSS=true
-		elif [[ "$1" = "-rps" ]]; then
-			shift
-			RPS_MAP="$1"
-		elif [[ "$1" = "-i" ]]; then
-			shift
-			DEV="$1"
-		else
-			die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
-			     [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
-		fi
-		shift
-	done
-}
-
-setup() {
-	setup_loopback_environment "${DEV}"
-
-	# Set up server_ns namespace and client_ns namespace
-	setup_macvlan_ns "${DEV}" $server_ns server \
-	"${SERVER_MAC}" "${SERVER_IP}"
-	setup_macvlan_ns "${DEV}" $client_ns client \
-	"${CLIENT_MAC}" "${CLIENT_IP}"
-}
-
-cleanup() {
-	cleanup_macvlan_ns $server_ns server $client_ns client
-	cleanup_loopback "${DEV}"
-}
-
-parse_opts $0 $@
-
-setup
-trap cleanup EXIT
-
-check_nic_rxhash_enabled
-
-# Actual test starts here
-if [[ "${TEST_RSS}" = true ]]; then
-	# RPS/RFS must be disabled because they move packets between cpus,
-	# which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
-	eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
-	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
-	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
-	  -C "$(get_rx_irq_cpus)" -s -v &
-elif [[ ! -z "${RPS_MAP}" ]]; then
-	eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
-	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
-	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
-	  -r "0x${RPS_MAP}" -s -v &
-else
-	ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
-	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
-fi
-
-server_pid=$!
-
-ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
-  "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
-
-client_pid=$!
-
-wait "${server_pid}"
-exit_code=$?
-kill -9 "${client_pid}"
-if [[ "${exit_code}" -eq 0 ]]; then
-	echo "Test Succeeded!"
-fi
-exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
deleted file mode 100755
index 2fef34f4aba1..000000000000
--- a/tools/testing/selftests/net/toeplitz_client.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# A simple program for generating traffic for the toeplitz test.
-#
-# This program sends packets periodically for, conservatively, 20 seconds. The
-# intent is for the calling program to kill this program once it is no longer
-# needed, rather than waiting for the 20 second expiration.
-
-send_traffic() {
-	expiration=$((SECONDS+20))
-	while [[ "${SECONDS}" -lt "${expiration}" ]]
-	do
-		if [[ "${PROTO}" == "-u" ]]; then
-			echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
-		else
-			echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
-		fi
-		sleep 0.001
-	done
-}
-
-PROTO=$1
-IPVER=$2
-ADDR=$3
-PORT=$4
-
-send_traffic
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index dbb34c7e09ce..a7c6ab8a0347 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -36,6 +36,35 @@ run_cmd()
 	return $rc
 }
 
+__check_traceroute_version()
+{
+	local cmd=$1; shift
+	local req_ver=$1; shift
+	local ver
+
+	req_ver=$(echo "$req_ver" | sed 's/\.//g')
+	ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g')
+	if [[ $ver -lt $req_ver ]]; then
+		return 1
+	else
+		return 0
+	fi
+}
+
+check_traceroute6_version()
+{
+	local req_ver=$1; shift
+
+	__check_traceroute_version traceroute6 "$req_ver"
+}
+
+check_traceroute_version()
+{
+	local req_ver=$1; shift
+
+	__check_traceroute_version traceroute "$req_ver"
+}
+
 ################################################################################
 # create namespaces and interconnects
 
@@ -59,6 +88,8 @@ create_ns()
 	ip netns exec ${ns} ip -6 ro add unreachable default metric 8192
 
 	ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0
+	ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
@@ -298,6 +329,144 @@ run_traceroute6_vrf()
 }
 
 ################################################################################
+# traceroute6 with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ----                          ----                          ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ----            N1            ----            N2            ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute6_ext()
+{
+	cleanup_all_ns
+}
+
+setup_traceroute6_ext()
+{
+	# Start clean
+	cleanup_traceroute6_ext
+
+	setup_ns h1 r1 h2
+	create_ns "$h1"
+	create_ns "$r1"
+	create_ns "$h2"
+
+	# Setup N1
+	connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+	# Setup N2
+	connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+	# Setup H1
+	ip -n "$h1" address add 2001:db8:1::1/128 dev lo
+	ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1
+
+	# Setup R1
+	ip -n "$r1" address add 2001:db8:1::2/128 dev lo
+	ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1
+	ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2
+
+	# Setup H2
+	ip -n "$h2" address add 2001:db8:1::3/128 dev lo
+	ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2
+
+	# Prime the network
+	ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1
+}
+
+traceroute6_ext_iio_iif_test()
+{
+	local r1_ifindex h2_ifindex
+	local pkt_len=$1; shift
+
+	# Test that incoming interface info is not appended by default.
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended by default when should not"
+
+	# Test that the extension is appended when enabled.
+	run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+	check_err $? "Failed to enable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+	check_err $? "Incoming interface info not appended after enable"
+
+	# Test that the extension is not appended when disabled.
+	run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+	check_err $? "Failed to disable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended after disable"
+
+	# Test that the extension is sent correctly from both R1 and H2.
+	run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+	r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1"
+
+	run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+	h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from H2"
+
+	# Add a global address on the incoming interface of R1 and check that
+	# it is reported.
+	run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad"
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1 after address addition"
+	run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1"
+
+	# Change name and MTU and make sure the result is still correct.
+	run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+	check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+	run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+	run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+	run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+}
+
+run_traceroute6_ext()
+{
+	# Need at least version 2.1.5 for RFC 5837 support.
+	if ! check_traceroute6_version 2.1.5; then
+		log_test_skip "traceroute6 too old, missing ICMP extensions support"
+		return
+	fi
+
+	setup_traceroute6_ext
+
+	RET=0
+
+	## General ICMP extensions tests
+
+	# Test that ICMP extensions are disabled by default.
+	run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\""
+	check_err $? "ICMP extensions are not disabled by default"
+
+	# Test that unsupported values are rejected. Do not use "sysctl" as
+	# older versions do not return an error code upon failure.
+	run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+	check_fail $? "Unsupported sysctl value was not rejected"
+
+	## Extension-specific tests
+
+	# Incoming interface info test. Test with various packet sizes,
+	# including the default one.
+	traceroute6_ext_iio_iif_test
+	traceroute6_ext_iio_iif_test 127
+	traceroute6_ext_iio_iif_test 128
+	traceroute6_ext_iio_iif_test 129
+
+	log_test "IPv6 traceroute with ICMP extensions"
+
+	cleanup_traceroute6_ext
+}
+
+################################################################################
 # traceroute test
 #
 # Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
@@ -438,14 +607,157 @@ run_traceroute_vrf()
 }
 
 ################################################################################
+# traceroute with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ----                          ----                          ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ----            N1            ----            N2            ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute_ext()
+{
+	cleanup_all_ns
+}
+
+setup_traceroute_ext()
+{
+	# Start clean
+	cleanup_traceroute_ext
+
+	setup_ns h1 r1 h2
+	create_ns "$h1"
+	create_ns "$r1"
+	create_ns "$h2"
+
+	# Setup N1
+	connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+	# Setup N2
+	connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+	# Setup H1
+	ip -n "$h1" address add 192.0.2.1/32 dev lo
+	ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1
+
+	# Setup R1
+	ip -n "$r1" address add 192.0.2.2/32 dev lo
+	ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1
+	ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2
+
+	# Setup H2
+	ip -n "$h2" address add 192.0.2.3/32 dev lo
+	ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2
+
+	# Prime the network
+	ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1
+}
+
+traceroute_ext_iio_iif_test()
+{
+	local r1_ifindex h2_ifindex
+	local pkt_len=$1; shift
+
+	# Test that incoming interface info is not appended by default.
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended by default when should not"
+
+	# Test that the extension is appended when enabled.
+	run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+	check_err $? "Failed to enable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+	check_err $? "Incoming interface info not appended after enable"
+
+	# Test that the extension is not appended when disabled.
+	run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+	check_err $? "Failed to disable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended after disable"
+
+	# Test that the extension is sent correctly from both R1 and H2.
+	run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+	r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1"
+
+	run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+	h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from H2"
+
+	# Add a global address on the incoming interface of R1 and check that
+	# it is reported.
+	run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1"
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1 after address addition"
+	run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1"
+
+	# Change name and MTU and make sure the result is still correct.
+	# Re-add the route towards H1 since it was deleted when we removed the
+	# last IPv4 address from eth1 on R1.
+	run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1"
+	run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+	check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+	run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+	run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+	run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+}
+
+run_traceroute_ext()
+{
+	# Need at least version 2.1.5 for RFC 5837 support.
+	if ! check_traceroute_version 2.1.5; then
+		log_test_skip "traceroute too old, missing ICMP extensions support"
+		return
+	fi
+
+	setup_traceroute_ext
+
+	RET=0
+
+	## General ICMP extensions tests
+
+	# Test that ICMP extensions are disabled by default.
+	run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\""
+	check_err $? "ICMP extensions are not disabled by default"
+
+	# Test that unsupported values are rejected. Do not use "sysctl" as
+	# older versions do not return an error code upon failure.
+	run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+	check_fail $? "Unsupported sysctl value was not rejected"
+
+	## Extension-specific tests
+
+	# Incoming interface info test. Test with various packet sizes,
+	# including the default one.
+	traceroute_ext_iio_iif_test
+	traceroute_ext_iio_iif_test 127
+	traceroute_ext_iio_iif_test 128
+	traceroute_ext_iio_iif_test 129
+
+	log_test "IPv4 traceroute with ICMP extensions"
+
+	cleanup_traceroute_ext
+}
+
+################################################################################
 # Run tests
 
 run_tests()
 {
 	run_traceroute6
 	run_traceroute6_vrf
+	run_traceroute6_ext
 	run_traceroute
 	run_traceroute_vrf
+	run_traceroute_ext
 }
 
 ################################################################################
@@ -462,6 +774,7 @@ done
 
 require_command traceroute6
 require_command traceroute
+require_command jq
 
 run_tests
 
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index dae91eb97d69..bcc14688661d 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -217,7 +217,7 @@ static void print_timestamp_usr(void)
 static void print_timestamp(struct scm_timestamping *tss, int tstype,
 			    int tskey, int payload_len)
 {
-	const char *tsname;
+	const char *tsname = NULL;
 
 	validate_key(tskey, tstype);
 
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
index db481af9b6b3..e8c02c64e03a 100755
--- a/tools/testing/selftests/net/vlan_bridge_binding.sh
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -249,6 +249,8 @@ test_binding_toggle_off_when_upper_down()
 	do_test_binding_off : "on->off when upper down"
 }
 
+require_command jq
+
 trap defer_scopes_cleanup EXIT
 setup_prepare
 tests_run
diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc
index 330e000baeb1..f9d43cbdc894 100644
--- a/tools/testing/selftests/nolibc/Makefile.nolibc
+++ b/tools/testing/selftests/nolibc/Makefile.nolibc
@@ -87,7 +87,6 @@ IMAGE_riscv      = arch/riscv/boot/Image
 IMAGE_riscv32    = arch/riscv/boot/Image
 IMAGE_riscv64    = arch/riscv/boot/Image
 IMAGE_s390x      = arch/s390/boot/bzImage
-IMAGE_s390       = arch/s390/boot/bzImage
 IMAGE_loongarch  = arch/loongarch/boot/vmlinuz.efi
 IMAGE_sparc32    = arch/sparc/boot/image
 IMAGE_sparc64    = arch/sparc/boot/image
@@ -117,7 +116,6 @@ DEFCONFIG_riscv      = defconfig
 DEFCONFIG_riscv32    = rv32_defconfig
 DEFCONFIG_riscv64    = defconfig
 DEFCONFIG_s390x      = defconfig
-DEFCONFIG_s390       = defconfig compat.config
 DEFCONFIG_loongarch  = defconfig
 DEFCONFIG_sparc32    = sparc32_defconfig
 DEFCONFIG_sparc64    = sparc64_defconfig
@@ -156,7 +154,6 @@ QEMU_ARCH_riscv      = riscv64
 QEMU_ARCH_riscv32    = riscv32
 QEMU_ARCH_riscv64    = riscv64
 QEMU_ARCH_s390x      = s390x
-QEMU_ARCH_s390       = s390x
 QEMU_ARCH_loongarch  = loongarch64
 QEMU_ARCH_sparc32    = sparc
 QEMU_ARCH_sparc64    = sparc64
@@ -197,7 +194,6 @@ QEMU_ARGS_riscv      = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T
 QEMU_ARGS_riscv32    = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_riscv64    = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_s390x      = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
-QEMU_ARGS_s390       = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_loongarch  = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_sparc32    = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_sparc64    = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -223,13 +219,13 @@ CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
 CFLAGS_s390x = -m64
-CFLAGS_s390 = -m31
 CFLAGS_mips32le = -EL -mabi=32 -fPIC
 CFLAGS_mips32be = -EB -mabi=32
 CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2
 CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6
 CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6
 CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2
+CFLAGS_loongarch = $(if $(LLVM),-fuse-ld=lld)
 CFLAGS_sparc32 = $(call cc-option,-m32)
 CFLAGS_sh4 = -ml -m4
 ifeq ($(origin XARCH),command line)
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 29de21595fc9..3c5a226dad3a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -25,6 +25,7 @@
 #include <sys/sysmacros.h>
 #include <sys/time.h>
 #include <sys/timerfd.h>
+#include <sys/uio.h>
 #include <sys/utsname.h>
 #include <sys/wait.h>
 #include <dirent.h>
@@ -1282,6 +1283,10 @@ int run_syscall(int min, int max)
 	int proc;
 	int test;
 	int tmp;
+	struct iovec iov_one = {
+		.iov_base = &tmp,
+		.iov_len = 1,
+	};
 	int ret = 0;
 	void *p1, *p2;
 	int has_gettid = 1;
@@ -1343,6 +1348,8 @@ int run_syscall(int min, int max)
 		CASE_TEST(dup3_0);            tmp = dup3(0, 100, 0);  EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
 		CASE_TEST(dup3_m1);           tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
 		CASE_TEST(execve_root);       EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
+		CASE_TEST(fchdir_stdin);      EXPECT_SYSER(1, fchdir(STDIN_FILENO), -1, ENOTDIR); break;
+		CASE_TEST(fchdir_badfd);      EXPECT_SYSER(1, fchdir(-1), -1, EBADF); break;
 		CASE_TEST(file_stream);       EXPECT_SYSZR(1, test_file_stream()); break;
 		CASE_TEST(fork);              EXPECT_SYSZR(1, test_fork(FORK_STANDARD)); break;
 		CASE_TEST(getdents64_root);   EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
@@ -1395,6 +1402,10 @@ int run_syscall(int min, int max)
 		CASE_TEST(waitpid_child);     EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break;
 		CASE_TEST(write_badf);        EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break;
 		CASE_TEST(write_zero);        EXPECT_SYSZR(1, write(1, &tmp, 0)); break;
+		CASE_TEST(readv_badf);        EXPECT_SYSER(1, readv(-1, &iov_one, 1), -1, EBADF); break;
+		CASE_TEST(readv_zero);        EXPECT_SYSZR(1, readv(1, NULL, 0)); break;
+		CASE_TEST(writev_badf);       EXPECT_SYSER(1, writev(-1, &iov_one, 1), -1, EBADF); break;
+		CASE_TEST(writev_zero);       EXPECT_SYSZR(1, writev(1, NULL, 0)); break;
 		CASE_TEST(syscall_noargs);    EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break;
 		CASE_TEST(syscall_args);      EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break;
 		CASE_TEST(namespace);         EXPECT_SYSZR(euid0 && proc, test_namespace()); break;
@@ -1540,6 +1551,8 @@ int run_stdlib(int min, int max)
 		CASE_TEST(abs);                     EXPECT_EQ(1, abs(-10), 10); break;
 		CASE_TEST(abs_noop);                EXPECT_EQ(1, abs(10), 10); break;
 		CASE_TEST(difftime);                EXPECT_ZR(1, test_difftime()); break;
+		CASE_TEST(memchr_foobar6_o);        EXPECT_STREQ(1, memchr("foobar", 'o', 6), "oobar"); break;
+		CASE_TEST(memchr_foobar3_b);        EXPECT_STRZR(1, memchr("foobar", 'b', 3)); break;
 
 		case __LINE__:
 			return ret; /* must be last */
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index e8af1fb505cf..3917cfb8fdc4 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -23,7 +23,7 @@ all_archs=(
 	mips32le mips32be mipsn32le mipsn32be mips64le mips64be
 	ppc ppc64 ppc64le
 	riscv32 riscv64
-	s390x s390
+	s390x
 	loongarch
 	sparc32 sparc64
 	m68k
@@ -169,7 +169,7 @@ test_arch() {
 	cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-")
 	build_dir="${build_location}/${arch}"
 	if [ "$werror" -ne 0 ]; then
-		CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror"
+		CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror -Wl,--fatal-warnings"
 	fi
 	MAKE=(make -f Makefile.nolibc -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
 
@@ -185,10 +185,6 @@ test_arch() {
 			exit 1
 	esac
 	printf '%-15s' "$arch:"
-	if [ "$arch" = "s390" ] && ([ "$llvm" = "1" ] || [ "$test_mode" = "user" ]); then
-		echo "Unsupported configuration"
-		return
-	fi
 	if [ "$arch" = "m68k" -o "$arch" = "sh4" ] && [ "$llvm" = "1" ]; then
 		echo "Unsupported configuration"
 		return
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index f87993def738..d60f10a873bb 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -148,6 +148,14 @@
 #define PIDFD_INFO_COREDUMP	(1UL << 4)
 #endif
 
+#ifndef PIDFD_INFO_SUPPORTED_MASK
+#define PIDFD_INFO_SUPPORTED_MASK	(1UL << 5)
+#endif
+
+#ifndef PIDFD_INFO_COREDUMP_SIGNAL
+#define PIDFD_INFO_COREDUMP_SIGNAL	(1UL << 6)
+#endif
+
 #ifndef PIDFD_COREDUMPED
 #define PIDFD_COREDUMPED	(1U << 0) /* Did crash and... */
 #endif
@@ -183,8 +191,11 @@ struct pidfd_info {
 	__u32 fsuid;
 	__u32 fsgid;
 	__s32 exit_code;
-	__u32 coredump_mask;
-	__u32 __spare1;
+	struct {
+		__u32 coredump_mask;
+		__u32 coredump_signal;
+	};
+	__u64 supported_mask;
 };
 
 /*
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
index a0eb6e81eaa2..cb5430a2fd75 100644
--- a/tools/testing/selftests/pidfd/pidfd_info_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -690,4 +690,77 @@ TEST_F(pidfd_info, thread_group_exec_thread)
 	EXPECT_EQ(close(pidfd_thread), 0);
 }
 
+/*
+ * Test: PIDFD_INFO_SUPPORTED_MASK field
+ *
+ * Verify that when PIDFD_INFO_SUPPORTED_MASK is requested, the kernel
+ * returns the supported_mask field indicating which flags the kernel supports.
+ */
+TEST(supported_mask_field)
+{
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_SUPPORTED_MASK,
+	};
+	int pidfd;
+	pid_t pid;
+
+	pid = create_child(&pidfd, 0);
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0)
+		pause();
+
+	/* Request supported_mask field */
+	ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+
+	/* Verify PIDFD_INFO_SUPPORTED_MASK is set in the reply */
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_SUPPORTED_MASK));
+
+	/* Verify supported_mask contains expected flags */
+	ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_PID));
+	ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_CGROUPID));
+	ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_EXIT));
+	ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP));
+	ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_SUPPORTED_MASK));
+	ASSERT_TRUE(!!(info.supported_mask & PIDFD_INFO_COREDUMP_SIGNAL));
+
+	/* Clean up */
+	sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
+	sys_waitid(P_PIDFD, pidfd, NULL, WEXITED);
+	close(pidfd);
+}
+
+/*
+ * Test: PIDFD_INFO_SUPPORTED_MASK always available
+ *
+ * Verify that supported_mask is returned even when other fields are requested.
+ */
+TEST(supported_mask_with_other_fields)
+{
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_SUPPORTED_MASK,
+	};
+	int pidfd;
+	pid_t pid;
+
+	pid = create_child(&pidfd, 0);
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0)
+		pause();
+
+	ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+
+	/* Both fields should be present */
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CGROUPID));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_SUPPORTED_MASK));
+	ASSERT_NE(info.supported_mask, 0);
+
+	/* Clean up */
+	sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
+	sys_waitid(P_PIDFD, pidfd, NULL, WEXITED);
+	close(pidfd);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 88ca4e368489..b5239b52cb5d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -31,7 +31,7 @@ fi
 if ! cp "$oldrun/scenarios" $T/scenarios.oldrun
 then
 	# Later on, can reconstitute this from console.log files.
-	echo Prior run batches file does not exist: $oldrun/batches
+	echo Prior run scenarios file does not exist: $oldrun/scenarios
 	exit 1
 fi
 
@@ -68,7 +68,7 @@ usage () {
 	echo "       --datestamp string"
 	echo "       --dryrun"
 	echo "       --duration minutes | <seconds>s | <hours>h | <days>d"
-	echo "       --link hard|soft|copy"
+	echo "       --link hard|soft|copy|inplace|inplace-force"
 	echo "       --remote"
 	echo "       --rundir /new/res/path"
 	echo "Command line: $scriptname $args"
@@ -121,7 +121,7 @@ do
 		shift
 		;;
 	--link)
-		checkarg --link "hard|soft|copy" "$#" "$2" 'hard\|soft\|copy' '^--'
+		checkarg --link "hard|soft|copy|inplace|inplace-force" "$#" "$2" 'hard\|soft\|copy\|inplace\|inplace-force' '^--'
 		case "$2" in
 		copy)
 			arg_link="cp -R"
@@ -132,6 +132,14 @@ do
 		soft)
 			arg_link="cp -Rs"
 			;;
+		inplace)
+			arg_link="inplace"
+			rundir="$oldrun"
+			;;
+		inplace-force)
+			arg_link="inplace-force"
+			rundir="$oldrun"
+			;;
 		esac
 		shift
 		;;
@@ -172,21 +180,37 @@ fi
 
 echo ---- Re-run results directory: $rundir
 
-# Copy old run directory tree over and adjust.
-mkdir -p "`dirname "$rundir"`"
-if ! $arg_link "$oldrun" "$rundir"
-then
-	echo "Cannot copy from $oldrun to $rundir."
-	usage
-fi
-rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
-touch "$rundir/log"
-echo $scriptname $args | tee -a "$rundir/log"
-echo $oldrun > "$rundir/re-run"
-if ! test -d "$rundir/../../bin"
+if test "$oldrun" != "$rundir"
 then
-	$arg_link "$oldrun/../../bin" "$rundir/../.."
+	# Copy old run directory tree over and adjust.
+	mkdir -p "`dirname "$rundir"`"
+	if ! $arg_link "$oldrun" "$rundir"
+	then
+		echo "Cannot copy from $oldrun to $rundir."
+		usage
+	fi
+	rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+	touch "$rundir/log"
+	echo $scriptname $args | tee -a "$rundir/log"
+	echo $oldrun > "$rundir/re-run"
+	if ! test -d "$rundir/../../bin"
+	then
+		$arg_link "$oldrun/../../bin" "$rundir/../.."
+	fi
+else
+	# Check for a run having already happened.
+	find "$rundir" -name console.log -print > $T/oldrun-console.log
+	if test -s $T/oldrun-console.log
+	then
+		echo Run already took place in $rundir
+		if test "$arg_link" = inplace
+		then
+			usage
+		fi
+	fi
 fi
+
+# Find runs to be done based on their qemu-cmd files.
 for i in $rundir/*/qemu-cmd
 do
 	cp "$i" $T
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-series.sh b/tools/testing/selftests/rcutorture/bin/kvm-series.sh
new file mode 100755
index 000000000000..2ff905a1853b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-series.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Usage: kvm-series.sh config-list commit-id-list [ kvm.sh parameters ]
+#
+# Tests the specified list of unadorned configs ("TREE01 SRCU-P" but not
+# "CFLIST" or "3*TRACE01") and an indication of a set of commits to test,
+# then runs each commit through the specified list of commits using kvm.sh.
+# The runs are grouped into a -series/config/commit directory tree.
+# Each run defaults to a duration of one minute.
+#
+# Run in top-level Linux source directory.  Please note that this is in
+# no way a replacement for "git bisect"!!!
+#
+# This script is intended to replace kvm-check-branches.sh by providing
+# ease of use and faster execution.
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-series.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+scriptname=$0
+args="$*"
+
+config_list="${1}"
+if test -z "${config_list}"
+then
+	echo "$0: Need a quoted list of --config arguments for first argument."
+	exit 1
+fi
+if test -z "${config_list}" || echo "${config_list}" | grep -q '\*'
+then
+	echo "$0: Repetition ('*') not allowed in config list."
+	exit 1
+fi
+
+commit_list="${2}"
+if test -z "${commit_list}"
+then
+	echo "$0: Need a list of commits (e.g., HEAD^^^..) for second argument."
+	exit 2
+fi
+git log --pretty=format:"%h" "${commit_list}" > $T/commits
+ret=$?
+if test "${ret}" -ne 0
+then
+	echo "$0: Invalid commit list ('${commit_list}')."
+	exit 2
+fi
+sha1_list=`cat $T/commits`
+
+shift
+shift
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+
+ret=0
+nfail=0
+nsuccess=0
+faillist=
+successlist=
+cursha1="`git rev-parse --abbrev-ref HEAD`"
+ds="`date +%Y.%m.%d-%H.%M.%S`-series"
+startdate="`date`"
+starttime="`get_starttime`"
+
+echo " --- " $scriptname $args | tee -a $T/log
+echo " --- Results directory: " $ds | tee -a $T/log
+
+for config in ${config_list}
+do
+	sha_n=0
+	for sha in ${sha1_list}
+	do
+		sha1=${sha_n}.${sha} # Enable "sort -k1nr" to list commits in order.
+		echo Starting ${config}/${sha1} at `date` | tee -a $T/log
+		git checkout "${sha}"
+		time tools/testing/selftests/rcutorture/bin/kvm.sh --configs "$config" --datestamp "$ds/${config}/${sha1}" --duration 1 "$@"
+		curret=$?
+		if test "${curret}" -ne 0
+		then
+			nfail=$((nfail+1))
+			faillist="$faillist ${config}/${sha1}(${curret})"
+		else
+			nsuccess=$((nsuccess+1))
+			successlist="$successlist ${config}/${sha1}"
+			# Successful run, so remove large files.
+			rm -f ${RCUTORTURE}/$ds/${config}/${sha1}/{vmlinux,bzImage,System.map,Module.symvers}
+		fi
+		if test "${ret}" -eq 0
+		then
+			ret=${curret}
+		fi
+		sha_n=$((sha_n+1))
+	done
+done
+git checkout "${cursha1}"
+
+echo ${nsuccess} SUCCESSES: | tee -a $T/log
+echo ${successlist} | fmt | tee -a $T/log
+echo | tee -a $T/log
+echo ${nfail} FAILURES: | tee -a $T/log
+echo ${faillist} | fmt | tee -a $T/log
+if test -n "${faillist}"
+then
+	echo | tee -a $T/log
+	echo Failures across commits: | tee -a $T/log
+	echo ${faillist} | tr ' ' '\012' | sed -e 's,^[^/]*/,,' -e 's/([0-9]*)//' |
+		sort | uniq -c | sort -k2n | tee -a $T/log
+fi
+echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
+echo Summary: Successes: ${nsuccess} Failures: ${nfail} | tee -a $T/log
+cp $T/log tools/testing/selftests/rcutorture/res/${ds}
+
+exit "${ret}"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 617cba339d28..fff15821c44c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -199,7 +199,7 @@ do
 		fi
 		;;
 	--kconfig|--kconfigs)
-		checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)* *$' '^error$'
+		checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|-\?[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|-\?[0-9]\+\|"[^"]*"\)\)* *$' '^error$'
 		TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
 		shift
 		;;
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index dc4985064b3a..67caf4276bb0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -16,3 +16,4 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
 CONFIG_RCU_EQS_DEBUG=y
 CONFIG_RCU_LAZY=y
+CONFIG_RCU_DYNTICKS_TORTURE=y
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 33baaa9f9997..e7b858cd3736 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -28,8 +28,6 @@ do {									\
 	RSEQ_WRITE_ONCE(*(p), v);					\
 } while (0)
 
-#ifdef __s390x__
-
 #define LONG_L			"lg"
 #define LONG_S			"stg"
 #define LONG_LT_R		"ltgr"
@@ -63,43 +61,6 @@ do {									\
 		".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n\t" \
 		".popsection\n\t"
 
-#elif __s390__
-
-#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
-				start_ip, post_commit_offset, abort_ip)	\
-		".pushsection __rseq_cs, \"aw\"\n\t"			\
-		".balign 32\n\t"					\
-		__rseq_str(label) ":\n\t"				\
-		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
-		".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
-		".popsection\n\t"					\
-		".pushsection __rseq_cs_ptr_array, \"aw\"\n\t"		\
-		".long 0x0, " __rseq_str(label) "b\n\t"			\
-		".popsection\n\t"
-
-/*
- * Exit points of a rseq critical section consist of all instructions outside
- * of the critical section where a critical section can either branch to or
- * reach through the normal course of its execution. The abort IP and the
- * post-commit IP are already part of the __rseq_cs section and should not be
- * explicitly defined as additional exit points. Knowing all exit points is
- * useful to assist debuggers stepping over the critical section.
- */
-#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip)			\
-		".pushsection __rseq_exit_point_array, \"aw\"\n\t"	\
-		".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n\t" \
-		".popsection\n\t"
-
-#define LONG_L			"l"
-#define LONG_S			"st"
-#define LONG_LT_R		"ltr"
-#define LONG_CMP		"c"
-#define LONG_CMP_R		"cr"
-#define LONG_ADDI		"ahi"
-#define LONG_ADD_R		"ar"
-
-#endif
-
 #define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
 	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
 				(post_commit_ip - start_ip), abort_ip)
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index 0443beacf362..d4be97498b32 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -33,6 +33,7 @@ Usage: $0 [OPTIONS]
   -c | --collection COLLECTION	Run all tests from COLLECTION
   -l | --list			List the available collection:test entries
   -d | --dry-run		Don't actually run any tests
+  -f | --no-error-on-fail	Don't exit with an error just because tests failed
   -n | --netns			Run each test in namespace
   -h | --help			Show this usage info
   -o | --override-timeout	Number of seconds after which we timeout
@@ -44,6 +45,7 @@ COLLECTIONS=""
 TESTS=""
 dryrun=""
 kselftest_override_timeout=""
+ERROR_ON_FAIL=true
 while true; do
 	case "$1" in
 		-s | --summary)
@@ -65,6 +67,9 @@ while true; do
 		-d | --dry-run)
 			dryrun="echo"
 			shift ;;
+		-f | --no-error-on-fail)
+			ERROR_ON_FAIL=false
+			shift ;;
 		-n | --netns)
 			RUN_IN_NETNS=1
 			shift ;;
@@ -105,9 +110,18 @@ if [ -n "$TESTS" ]; then
 	available="$(echo "$valid" | sed -e 's/ /\n/g')"
 fi
 
+kselftest_failures_file="$(mktemp --tmpdir kselftest-failures-XXXXXX)"
+export kselftest_failures_file
+
 collections=$(echo "$available" | cut -d: -f1 | sort | uniq)
 for collection in $collections ; do
 	[ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg
 	tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2)
 	($dryrun cd "$collection" && $dryrun run_many $tests)
 done
+
+failures="$(cat "$kselftest_failures_file")"
+rm "$kselftest_failures_file"
+if "$ERROR_ON_FAIL" && [ "$failures" ]; then
+	exit 1
+fi
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 9d9d6b4c38b0..5fe45f9c5f8f 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -174,6 +174,7 @@ auto-test-targets :=			\
 	minimal				\
 	numa				\
 	allowed_cpus			\
+	peek_dsq			\
 	prog_run			\
 	reload_loop			\
 	select_cpu_dfl			\
diff --git a/tools/testing/selftests/sched_ext/peek_dsq.bpf.c b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
new file mode 100644
index 000000000000..a3faf5bb49d6
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/peek_dsq.bpf.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A BPF program for testing DSQ operations and peek in particular.
+ *
+ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2025 Ryan Newton <ryan.newton@alum.mit.edu>
+ */
+
+#include <scx/common.bpf.h>
+#include <scx/compat.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei); /* Error handling */
+
+#define MAX_SAMPLES 100
+#define MAX_CPUS 512
+#define DSQ_POOL_SIZE 8
+int max_samples = MAX_SAMPLES;
+int max_cpus = MAX_CPUS;
+int dsq_pool_size = DSQ_POOL_SIZE;
+
+/* Global variables to store test results */
+int dsq_peek_result1 = -1;
+long dsq_inserted_pid = -1;
+int insert_test_cpu = -1; /* Set to the cpu that performs the test */
+long dsq_peek_result2 = -1;
+long dsq_peek_result2_pid = -1;
+long dsq_peek_result2_expected = -1;
+int test_dsq_id = 1234; /* Use a simple ID like create_dsq example */
+int real_dsq_id = 1235; /* DSQ for normal operation */
+int enqueue_count = -1;
+int dispatch_count = -1;
+bool debug_ksym_exists;
+
+/* DSQ pool for stress testing */
+int dsq_pool_base_id = 2000;
+int phase1_complete = -1;
+long total_peek_attempts = -1;
+long successful_peeks = -1;
+
+/* BPF map for sharing peek results with userspace */
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, MAX_SAMPLES);
+	__type(key, u32);
+	__type(value, long);
+} peek_results SEC(".maps");
+
+static int get_random_dsq_id(void)
+{
+	u64 time = bpf_ktime_get_ns();
+
+	return dsq_pool_base_id + (time % DSQ_POOL_SIZE);
+}
+
+static void record_peek_result(long pid)
+{
+	u32 slot_key;
+	long *slot_pid_ptr;
+	int ix;
+
+	if (pid <= 0)
+		return;
+
+	/* Find an empty slot or one with the same PID */
+	bpf_for(ix, 0, 10) {
+		slot_key = (pid + ix) % MAX_SAMPLES;
+		slot_pid_ptr = bpf_map_lookup_elem(&peek_results, &slot_key);
+		if (!slot_pid_ptr)
+			continue;
+
+		if (*slot_pid_ptr == -1 || *slot_pid_ptr == pid) {
+			*slot_pid_ptr = pid;
+			break;
+		}
+	}
+}
+
+/* Scan all DSQs in the pool and try to move a task to local */
+static int scan_dsq_pool(void)
+{
+	struct task_struct *task;
+	int moved = 0;
+	int i;
+
+	bpf_for(i, 0, DSQ_POOL_SIZE) {
+		int dsq_id = dsq_pool_base_id + i;
+
+		total_peek_attempts++;
+
+		task = __COMPAT_scx_bpf_dsq_peek(dsq_id);
+		if (task) {
+			successful_peeks++;
+			record_peek_result(task->pid);
+
+			/* Try to move this task to local */
+			if (!moved && scx_bpf_dsq_move_to_local(dsq_id) == 0) {
+				moved = 1;
+				break;
+			}
+		}
+	}
+	return moved;
+}
+
+/* Struct_ops scheduler for testing DSQ peek operations */
+void BPF_STRUCT_OPS(peek_dsq_enqueue, struct task_struct *p, u64 enq_flags)
+{
+	struct task_struct *peek_result;
+	int last_insert_test_cpu, cpu;
+
+	enqueue_count++;
+	cpu = bpf_get_smp_processor_id();
+	last_insert_test_cpu = __sync_val_compare_and_swap(&insert_test_cpu, -1, cpu);
+
+	/* Phase 1: Simple insert-then-peek test (only on first task) */
+	if (last_insert_test_cpu == -1) {
+		bpf_printk("peek_dsq_enqueue beginning phase 1 peek test on cpu %d", cpu);
+
+		/* Test 1: Peek empty DSQ - should return NULL */
+		peek_result = __COMPAT_scx_bpf_dsq_peek(test_dsq_id);
+		dsq_peek_result1 = (long)peek_result; /* Should be 0 (NULL) */
+
+		/* Test 2: Insert task into test DSQ for testing in dispatch callback */
+		dsq_inserted_pid = p->pid;
+		scx_bpf_dsq_insert(p, test_dsq_id, 0, enq_flags);
+		dsq_peek_result2_expected = (long)p; /* Expected the task we just inserted */
+	} else if (!phase1_complete) {
+		/* Still in phase 1, use real DSQ */
+		scx_bpf_dsq_insert(p, real_dsq_id, 0, enq_flags);
+	} else {
+		/* Phase 2: Random DSQ insertion for stress testing */
+		int random_dsq_id = get_random_dsq_id();
+
+		scx_bpf_dsq_insert(p, random_dsq_id, 0, enq_flags);
+	}
+}
+
+void BPF_STRUCT_OPS(peek_dsq_dispatch, s32 cpu, struct task_struct *prev)
+{
+	dispatch_count++;
+
+	/* Phase 1: Complete the simple peek test if we inserted a task but
+	 * haven't tested peek yet
+	 */
+	if (insert_test_cpu == cpu && dsq_peek_result2 == -1) {
+		struct task_struct *peek_result;
+
+		bpf_printk("peek_dsq_dispatch completing phase 1 peek test on cpu %d", cpu);
+
+		/* Test 3: Peek DSQ after insert - should return the task we inserted */
+		peek_result = __COMPAT_scx_bpf_dsq_peek(test_dsq_id);
+		/* Store the PID of the peeked task for comparison */
+		dsq_peek_result2 = (long)peek_result;
+		dsq_peek_result2_pid = peek_result ? peek_result->pid : -1;
+
+		/* Now consume the task since we've peeked at it */
+		scx_bpf_dsq_move_to_local(test_dsq_id);
+
+		/* Mark phase 1 as complete */
+		phase1_complete = 1;
+		bpf_printk("Phase 1 complete, starting phase 2 stress testing");
+	} else if (!phase1_complete) {
+		/* Still in phase 1, use real DSQ */
+		scx_bpf_dsq_move_to_local(real_dsq_id);
+	} else {
+		/* Phase 2: Scan all DSQs in the pool and try to move a task */
+		if (!scan_dsq_pool()) {
+			/* No tasks found in DSQ pool, fall back to real DSQ */
+			scx_bpf_dsq_move_to_local(real_dsq_id);
+		}
+	}
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(peek_dsq_init)
+{
+	s32 err;
+	int i;
+
+	/* Always set debug values so we can see which version we're using */
+	debug_ksym_exists = bpf_ksym_exists(scx_bpf_dsq_peek) ? 1 : 0;
+
+	/* Initialize state first */
+	insert_test_cpu = -1;
+	enqueue_count = 0;
+	dispatch_count = 0;
+	phase1_complete = 0;
+	total_peek_attempts = 0;
+	successful_peeks = 0;
+
+	/* Create the test and real DSQs */
+	err = scx_bpf_create_dsq(test_dsq_id, -1);
+	if (err) {
+		scx_bpf_error("Failed to create DSQ %d: %d", test_dsq_id, err);
+		return err;
+	}
+	err = scx_bpf_create_dsq(real_dsq_id, -1);
+	if (err) {
+		scx_bpf_error("Failed to create DSQ %d: %d", test_dsq_id, err);
+		return err;
+	}
+
+	/* Create the DSQ pool for stress testing */
+	bpf_for(i, 0, DSQ_POOL_SIZE) {
+		int dsq_id = dsq_pool_base_id + i;
+
+		err = scx_bpf_create_dsq(dsq_id, -1);
+		if (err) {
+			scx_bpf_error("Failed to create DSQ pool entry %d: %d", dsq_id, err);
+			return err;
+		}
+	}
+
+	/* Initialize the peek results map */
+	bpf_for(i, 0, MAX_SAMPLES) {
+		u32 key = i;
+		long pid = -1;
+
+		bpf_map_update_elem(&peek_results, &key, &pid, BPF_ANY);
+	}
+
+	return 0;
+}
+
+void BPF_STRUCT_OPS(peek_dsq_exit, struct scx_exit_info *ei)
+{
+	int i;
+
+	/* Destroy the primary DSQs */
+	scx_bpf_destroy_dsq(test_dsq_id);
+	scx_bpf_destroy_dsq(real_dsq_id);
+
+	/* Destroy the DSQ pool */
+	bpf_for(i, 0, DSQ_POOL_SIZE) {
+		int dsq_id = dsq_pool_base_id + i;
+
+		scx_bpf_destroy_dsq(dsq_id);
+	}
+
+	UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops peek_dsq_ops = {
+	.enqueue = (void *)peek_dsq_enqueue,
+	.dispatch = (void *)peek_dsq_dispatch,
+	.init = (void *)peek_dsq_init,
+	.exit = (void *)peek_dsq_exit,
+	.name = "peek_dsq",
+};
diff --git a/tools/testing/selftests/sched_ext/peek_dsq.c b/tools/testing/selftests/sched_ext/peek_dsq.c
new file mode 100644
index 000000000000..a717384a3224
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/peek_dsq.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for DSQ operations including create, destroy, and peek operations.
+ *
+ * Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2025 Ryan Newton <ryan.newton@alum.mit.edu>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+#include <sched.h>
+#include "peek_dsq.bpf.skel.h"
+#include "scx_test.h"
+
+#define NUM_WORKERS 4
+
+static bool workload_running = true;
+static pthread_t workload_threads[NUM_WORKERS];
+
+/**
+ * Background workload thread that sleeps and wakes rapidly to exercise
+ * the scheduler's enqueue operations and ensure DSQ operations get tested.
+ */
+static void *workload_thread_fn(void *arg)
+{
+	while (workload_running) {
+		/* Sleep for a very short time to trigger scheduler activity */
+		usleep(1000); /* 1ms sleep */
+		/* Yield to ensure we go through the scheduler */
+		sched_yield();
+	}
+	return NULL;
+}
+
+static enum scx_test_status setup(void **ctx)
+{
+	struct peek_dsq *skel;
+
+	skel = peek_dsq__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(peek_dsq__load(skel), "Failed to load skel");
+
+	*ctx = skel;
+
+	return SCX_TEST_PASS;
+}
+
+static int print_observed_pids(struct bpf_map *map, int max_samples, const char *dsq_name)
+{
+	long count = 0;
+
+	printf("Observed %s DSQ peek pids:\n", dsq_name);
+	for (int i = 0; i < max_samples; i++) {
+		long pid;
+		int err;
+
+		err = bpf_map_lookup_elem(bpf_map__fd(map), &i, &pid);
+		if (err == 0) {
+			if (pid == 0) {
+				printf("  Sample %d: NULL peek\n", i);
+			} else if (pid > 0) {
+				printf("  Sample %d: pid %ld\n", i, pid);
+				count++;
+			}
+		} else {
+			printf("  Sample %d: error reading pid (err=%d)\n", i, err);
+		}
+	}
+	printf("Observed ~%ld pids in the %s DSQ(s)\n", count, dsq_name);
+	return count;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+	struct peek_dsq *skel = ctx;
+	bool failed = false;
+	int seconds = 3;
+	int err;
+
+	/* Enable the scheduler to test DSQ operations */
+	printf("Enabling scheduler to test DSQ insert operations...\n");
+
+	struct bpf_link *link =
+		bpf_map__attach_struct_ops(skel->maps.peek_dsq_ops);
+
+	if (!link) {
+		SCX_ERR("Failed to attach struct_ops");
+		return SCX_TEST_FAIL;
+	}
+
+	printf("Starting %d background workload threads...\n", NUM_WORKERS);
+	workload_running = true;
+	for (int i = 0; i < NUM_WORKERS; i++) {
+		err = pthread_create(&workload_threads[i], NULL, workload_thread_fn, NULL);
+		if (err) {
+			SCX_ERR("Failed to create workload thread %d: %s", i, strerror(err));
+			/* Stop already created threads */
+			workload_running = false;
+			for (int j = 0; j < i; j++)
+				pthread_join(workload_threads[j], NULL);
+			bpf_link__destroy(link);
+			return SCX_TEST_FAIL;
+		}
+	}
+
+	printf("Waiting for enqueue events.\n");
+	sleep(seconds);
+	while (skel->data->enqueue_count <= 0) {
+		printf(".");
+		fflush(stdout);
+		sleep(1);
+		seconds++;
+		if (seconds >= 30) {
+			printf("\n\u2717 Timeout waiting for enqueue events\n");
+			/* Stop workload threads and cleanup */
+			workload_running = false;
+			for (int i = 0; i < NUM_WORKERS; i++)
+				pthread_join(workload_threads[i], NULL);
+			bpf_link__destroy(link);
+			return SCX_TEST_FAIL;
+		}
+	}
+
+	workload_running = false;
+	for (int i = 0; i < NUM_WORKERS; i++) {
+		err = pthread_join(workload_threads[i], NULL);
+		if (err) {
+			SCX_ERR("Failed to join workload thread %d: %s", i, strerror(err));
+			bpf_link__destroy(link);
+			return SCX_TEST_FAIL;
+		}
+	}
+	printf("Background workload threads stopped.\n");
+
+	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+
+	/* Detach the scheduler */
+	bpf_link__destroy(link);
+
+	printf("Enqueue/dispatch count over %d seconds: %d / %d\n", seconds,
+		skel->data->enqueue_count, skel->data->dispatch_count);
+	printf("Debug: ksym_exists=%d\n",
+	       skel->bss->debug_ksym_exists);
+
+	/* Check DSQ insert result */
+	printf("DSQ insert test done on cpu: %d\n", skel->data->insert_test_cpu);
+	if (skel->data->insert_test_cpu != -1)
+		printf("\u2713 DSQ insert succeeded !\n");
+	else {
+		printf("\u2717 DSQ insert failed or not attempted\n");
+		failed = true;
+	}
+
+	/* Check DSQ peek results */
+	printf("  DSQ peek result 1 (before insert): %d\n",
+	       skel->data->dsq_peek_result1);
+	if (skel->data->dsq_peek_result1 == 0)
+		printf("\u2713 DSQ peek verification success: peek returned NULL!\n");
+	else {
+		printf("\u2717 DSQ peek verification failed\n");
+		failed = true;
+	}
+
+	printf("  DSQ peek result 2 (after insert): %ld\n",
+	       skel->data->dsq_peek_result2);
+	printf("  DSQ peek result 2, expected: %ld\n",
+	       skel->data->dsq_peek_result2_expected);
+	if (skel->data->dsq_peek_result2 ==
+	    skel->data->dsq_peek_result2_expected)
+		printf("\u2713 DSQ peek verification success: peek returned the inserted task!\n");
+	else {
+		printf("\u2717 DSQ peek verification failed\n");
+		failed = true;
+	}
+
+	printf("  Inserted test task -> pid: %ld\n", skel->data->dsq_inserted_pid);
+	printf("  DSQ peek result 2 -> pid: %ld\n", skel->data->dsq_peek_result2_pid);
+
+	int pid_count;
+
+	pid_count = print_observed_pids(skel->maps.peek_results,
+					skel->data->max_samples, "DSQ pool");
+	printf("Total non-null peek observations: %ld out of %ld\n",
+	       skel->data->successful_peeks, skel->data->total_peek_attempts);
+
+	if (skel->bss->debug_ksym_exists && pid_count == 0) {
+		printf("\u2717 DSQ pool test failed: no successful peeks in native mode\n");
+		failed = true;
+	}
+	if (skel->bss->debug_ksym_exists && pid_count > 0)
+		printf("\u2713 DSQ pool test success: observed successful peeks in native mode\n");
+
+	if (failed)
+		return SCX_TEST_FAIL;
+	else
+		return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+	struct peek_dsq *skel = ctx;
+
+	if (workload_running) {
+		workload_running = false;
+		for (int i = 0; i < NUM_WORKERS; i++)
+			pthread_join(workload_threads[i], NULL);
+	}
+
+	peek_dsq__destroy(skel);
+}
+
+struct scx_test peek_dsq = {
+	.name = "peek_dsq",
+	.description =
+		"Test DSQ create/destroy operations and future peek functionality",
+	.setup = setup,
+	.run = run,
+	.cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&peek_dsq)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 998e5a2f4579..47de27fd4f90 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -961,5 +961,77 @@
         "teardown": [
             "$TC qdisc del dev $DUMMY root"
         ]
+    },
+    {
+        "id": "4989",
+        "name": "Try to add an fq child to an ingress qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DUMMY handle ffff:0 ingress"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq",
+        "expExitCode": "2",
+        "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:",
+        "matchJSON": [],
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY ingress"
+        ]
+    },
+    {
+        "id": "c2b0",
+        "name": "Try to add an fq child to a clsact qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DUMMY handle ffff:0 clsact"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq",
+        "expExitCode": "2",
+        "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:",
+        "matchJSON": [],
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY clsact"
+        ]
+    },
+    {
+        "id": "4366",
+        "name": "CAKE with QFQ Parent - CAKE enqueue with packets dropping",
+        "category": [
+            "qdisc",
+            "cake",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup":[
+            "$TC qdisc add dev $DUMMY handle 1: root qfq",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 1024",
+            "$TC qdisc add dev $DUMMY parent 1:1 handle 2: cake memlimit 9",
+            "$TC filter add dev $DUMMY protocol ip parent 1: prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+            "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+            "$TC qdisc replace dev $DUMMY parent 1:1 handle 3: netem delay 0ms"
+        ],
+        "cmdUnderTest": "ping -I$DUMMY -f -c1 -s64 -W1 10.10.10.1 || true",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+        "matchPattern": "qdisc qfq 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
index 252c6308c569..10badae13ebe 100644
--- a/tools/testing/selftests/timers/nanosleep.c
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -116,6 +116,56 @@ int nanosleep_test(int clockid, long long ns)
 	return 0;
 }
 
+static void dummy_event_handler(int val)
+{
+	/* No action needed */
+}
+
+static int nanosleep_test_remaining(int clockid)
+{
+	struct timespec rqtp = {}, rmtp = {};
+	struct itimerspec itimer = {};
+	struct sigaction sa = {};
+	timer_t timer;
+	int ret;
+
+	sa.sa_handler = dummy_event_handler;
+	ret = sigaction(SIGALRM, &sa, NULL);
+	if (ret)
+		return -1;
+
+	ret = timer_create(clockid, NULL, &timer);
+	if (ret)
+		return -1;
+
+	itimer.it_value.tv_nsec = NSEC_PER_SEC / 4;
+	ret = timer_settime(timer, 0, &itimer, NULL);
+	if (ret)
+		return -1;
+
+	rqtp.tv_nsec = NSEC_PER_SEC / 2;
+	ret = clock_nanosleep(clockid, 0, &rqtp, &rmtp);
+	if (ret != EINTR)
+		return -1;
+
+	ret = timer_delete(timer);
+	if (ret)
+		return -1;
+
+	sa.sa_handler = SIG_DFL;
+	ret = sigaction(SIGALRM, &sa, NULL);
+	if (ret)
+		return -1;
+
+	if (!in_order((struct timespec) {}, rmtp))
+		return -1;
+
+	if (!in_order(rmtp, rqtp))
+		return -1;
+
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	long long length;
@@ -150,6 +200,11 @@ int main(int argc, char **argv)
 			}
 			length *= 100;
 		}
+		ret = nanosleep_test_remaining(clockid);
+		if (ret < 0) {
+			ksft_test_result_fail("%-31s\n", clockstring(clockid));
+			ksft_exit_fail();
+		}
 		ksft_test_result_pass("%-31s\n", clockstring(clockid));
 next:
 		ret = 0;
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index f0eceb0faf34..a563c438ac79 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -18,6 +18,7 @@
 #include <time.h>
 #include <include/vdso/time64.h>
 #include <pthread.h>
+#include <stdbool.h>
 
 #include "../kselftest.h"
 
@@ -670,8 +671,14 @@ static void check_timer_create_exact(void)
 
 int main(int argc, char **argv)
 {
+	bool run_sig_ign_tests = ksft_min_kernel_version(6, 13);
+
 	ksft_print_header();
-	ksft_set_plan(19);
+	if (run_sig_ign_tests) {
+		ksft_set_plan(19);
+	} else {
+		ksft_set_plan(10);
+	}
 
 	ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
 	ksft_print_msg("based timers if other threads run on the CPU...\n");
@@ -695,15 +702,20 @@ int main(int argc, char **argv)
 	check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
 	check_timer_distribution();
 
-	check_sig_ign(0);
-	check_sig_ign(1);
-	check_rearm();
-	check_delete();
-	check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
-	check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
-	check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
-	check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
-	check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
+	if (run_sig_ign_tests) {
+		check_sig_ign(0);
+		check_sig_ign(1);
+		check_rearm();
+		check_delete();
+		check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+		check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+		check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+		check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+		check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
+	} else {
+		ksft_print_msg("Skipping SIG_IGN tests on kernel < 6.13\n");
+	}
+
 	check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
 	check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
 	check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 6b8123c12a7a..f8fa102a627f 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -836,56 +836,70 @@ static int ublk_process_io(struct ublk_thread *t)
 	return reapped;
 }
 
-static void ublk_thread_set_sched_affinity(const struct ublk_thread *t,
-		cpu_set_t *cpuset)
-{
-        if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
-		ublk_err("ublk dev %u thread %u set affinity failed",
-				t->dev->dev_info.dev_id, t->idx);
-}
-
 struct ublk_thread_info {
 	struct ublk_dev 	*dev;
+	pthread_t		thread;
 	unsigned		idx;
 	sem_t 			*ready;
 	cpu_set_t 		*affinity;
 	unsigned long long	extra_flags;
 };
 
-static void *ublk_io_handler_fn(void *data)
+static void ublk_thread_set_sched_affinity(const struct ublk_thread_info *info)
 {
-	struct ublk_thread_info *info = data;
-	struct ublk_thread *t = &info->dev->threads[info->idx];
+	if (pthread_setaffinity_np(pthread_self(), sizeof(*info->affinity), info->affinity) < 0)
+		ublk_err("ublk dev %u thread %u set affinity failed",
+				info->dev->dev_info.dev_id, info->idx);
+}
+
+static __attribute__((noinline)) int __ublk_io_handler_fn(struct ublk_thread_info *info)
+{
+	struct ublk_thread t = {
+		.dev = info->dev,
+		.idx = info->idx,
+	};
 	int dev_id = info->dev->dev_info.dev_id;
 	int ret;
 
-	t->dev = info->dev;
-	t->idx = info->idx;
-
-	ret = ublk_thread_init(t, info->extra_flags);
+	ret = ublk_thread_init(&t, info->extra_flags);
 	if (ret) {
 		ublk_err("ublk dev %d thread %u init failed\n",
-				dev_id, t->idx);
-		return NULL;
+				dev_id, t.idx);
+		return ret;
 	}
-	/* IO perf is sensitive with queue pthread affinity on NUMA machine*/
-	if (info->affinity)
-		ublk_thread_set_sched_affinity(t, info->affinity);
 	sem_post(info->ready);
 
 	ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
-			gettid(), dev_id, t->idx);
+			gettid(), dev_id, t.idx);
 
 	/* submit all io commands to ublk driver */
-	ublk_submit_fetch_commands(t);
+	ublk_submit_fetch_commands(&t);
 	do {
-		if (ublk_process_io(t) < 0)
+		if (ublk_process_io(&t) < 0)
 			break;
 	} while (1);
 
 	ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %d exiting\n",
-		 gettid(), dev_id, t->idx);
-	ublk_thread_deinit(t);
+		 gettid(), dev_id, t.idx);
+	ublk_thread_deinit(&t);
+	return 0;
+}
+
+static void *ublk_io_handler_fn(void *data)
+{
+	struct ublk_thread_info *info = data;
+
+	/*
+	 * IO perf is sensitive with queue pthread affinity on NUMA machine
+	 *
+	 * Set sched_affinity at beginning, so following allocated memory/pages
+	 * could be CPU/NUMA aware.
+	 */
+	if (info->affinity)
+		ublk_thread_set_sched_affinity(info);
+
+	__ublk_io_handler_fn(info);
+
 	return NULL;
 }
 
@@ -983,14 +997,13 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
 		 */
 		if (dev->nthreads == dinfo->nr_hw_queues)
 			tinfo[i].affinity = &affinity_buf[i];
-		pthread_create(&dev->threads[i].thread, NULL,
+		pthread_create(&tinfo[i].thread, NULL,
 				ublk_io_handler_fn,
 				&tinfo[i]);
 	}
 
 	for (i = 0; i < dev->nthreads; i++)
 		sem_wait(&ready);
-	free(tinfo);
 	free(affinity_buf);
 
 	/* everything is fine now, start us */
@@ -1013,7 +1026,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
 
 	/* wait until we are terminated */
 	for (i = 0; i < dev->nthreads; i++)
-		pthread_join(dev->threads[i].thread, &thread_ret);
+		pthread_join(tinfo[i].thread, &thread_ret);
+	free(tinfo);
  fail:
 	for (i = 0; i < dinfo->nr_hw_queues; i++)
 		ublk_queue_deinit(&dev->q[i]);
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 5e55484fb0aa..fe42705c6d42 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -175,23 +175,20 @@ struct ublk_queue {
 
 struct ublk_thread {
 	struct ublk_dev *dev;
-	struct io_uring ring;
-	unsigned int cmd_inflight;
-	unsigned int io_inflight;
-
-	pthread_t thread;
 	unsigned idx;
 
 #define UBLKS_T_STOPPING	(1U << 0)
 #define UBLKS_T_IDLE	(1U << 1)
 	unsigned state;
+	unsigned int cmd_inflight;
+	unsigned int io_inflight;
+	struct io_uring ring;
 };
 
 struct ublk_dev {
 	struct ublk_tgt tgt;
 	struct ublksrv_ctrl_dev_info  dev_info;
 	struct ublk_queue q[UBLK_MAX_QUEUES];
-	struct ublk_thread threads[UBLK_MAX_THREADS];
 	unsigned nthreads;
 	unsigned per_io_tasks;
 
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
index 5288e768b207..68625362add2 100644
--- a/tools/testing/selftests/user_events/perf_test.c
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) {
 	ASSERT_EQ(1 << reg.enable_bit, self->check);
 
 	/* Ensure write shows up at correct offset */
-	ASSERT_NE(-1, write(self->data_fd, &reg.write_index,
+	ASSERT_NE(-1, write(self->data_fd, (void *)&reg.write_index,
 					sizeof(reg.write_index)));
 	val = (void *)(((char *)perf_page) + perf_page->data_offset);
 	ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index 5fdd0f362337..50c261005111 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -25,10 +25,6 @@
 #define VDSO_VERSION		1
 #define VDSO_NAMES		0
 #define VDSO_32BIT		1
-#elif defined (__s390__) && !defined(__s390x__)
-#define VDSO_VERSION		2
-#define VDSO_NAMES		0
-#define VDSO_32BIT		1
 #elif defined (__s390x__)
 #define VDSO_VERSION		2
 #define VDSO_NAMES		0
diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
index ed31606e01b7..69ec0c856481 100644
--- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -4,9 +4,12 @@
 
 #include <fcntl.h>
 #include <string.h>
-#include <linux/vfio.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
 #include <linux/list.h>
 #include <linux/pci_regs.h>
+#include <linux/vfio.h>
 
 #include "../../../kselftest.h"
 
@@ -185,6 +188,13 @@ struct vfio_pci_device {
 	struct vfio_pci_driver driver;
 };
 
+struct iova_allocator {
+	struct iommu_iova_range *ranges;
+	u32 nranges;
+	u32 range_idx;
+	u64 range_offset;
+};
+
 /*
  * Return the BDF string of the device that the test should use.
  *
@@ -206,10 +216,36 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
 void vfio_pci_device_cleanup(struct vfio_pci_device *device);
 void vfio_pci_device_reset(struct vfio_pci_device *device);
 
-void vfio_pci_dma_map(struct vfio_pci_device *device,
-		      struct vfio_dma_region *region);
-void vfio_pci_dma_unmap(struct vfio_pci_device *device,
-			struct vfio_dma_region *region);
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+					      u32 *nranges);
+
+struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device);
+void iova_allocator_cleanup(struct iova_allocator *allocator);
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size);
+
+int __vfio_pci_dma_map(struct vfio_pci_device *device,
+		       struct vfio_dma_region *region);
+int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
+			 struct vfio_dma_region *region,
+			 u64 *unmapped);
+int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped);
+
+static inline void vfio_pci_dma_map(struct vfio_pci_device *device,
+				    struct vfio_dma_region *region)
+{
+	VFIO_ASSERT_EQ(__vfio_pci_dma_map(device, region), 0);
+}
+
+static inline void vfio_pci_dma_unmap(struct vfio_pci_device *device,
+				      struct vfio_dma_region *region)
+{
+	VFIO_ASSERT_EQ(__vfio_pci_dma_unmap(device, region, NULL), 0);
+}
+
+static inline void vfio_pci_dma_unmap_all(struct vfio_pci_device *device)
+{
+	VFIO_ASSERT_EQ(__vfio_pci_dma_unmap_all(device, NULL), 0);
+}
 
 void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
 			    size_t config, size_t size, void *data);
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index 0921b2451ba5..b479a359da12 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -2,6 +2,7 @@
 #include <dirent.h>
 #include <fcntl.h>
 #include <libgen.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
@@ -11,11 +12,12 @@
 #include <sys/mman.h>
 
 #include <uapi/linux/types.h>
+#include <linux/iommufd.h>
 #include <linux/limits.h>
 #include <linux/mman.h>
+#include <linux/overflow.h>
 #include <linux/types.h>
 #include <linux/vfio.h>
-#include <linux/iommufd.h>
 
 #include "../../../kselftest.h"
 #include <vfio_util.h>
@@ -28,6 +30,249 @@
 	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
 } while (0)
 
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
+						 u32 *cap_offset)
+{
+	struct vfio_info_cap_header *hdr;
+
+	if (!*cap_offset)
+		return NULL;
+
+	VFIO_ASSERT_LT(*cap_offset, bufsz);
+	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+
+	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+	*cap_offset = hdr->next;
+
+	return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
+							    u16 cap_id)
+{
+	struct vfio_info_cap_header *hdr;
+	u32 cap_offset = info->cap_offset;
+	u32 max_depth;
+	u32 depth = 0;
+
+	if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
+		return NULL;
+
+	if (cap_offset)
+		VFIO_ASSERT_GE(cap_offset, sizeof(*info));
+
+	max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
+
+	while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
+		depth++;
+		VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
+
+		if (hdr->id == cap_id)
+			return hdr;
+	}
+
+	return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device)
+{
+	struct vfio_iommu_type1_info *info;
+
+	info = malloc(sizeof(*info));
+	VFIO_ASSERT_NOT_NULL(info);
+
+	*info = (struct vfio_iommu_type1_info) {
+		.argsz = sizeof(*info),
+	};
+
+	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
+	VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+	info = realloc(info, info->argsz);
+	VFIO_ASSERT_NOT_NULL(info);
+
+	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
+	VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+	return info;
+}
+
+/*
+ * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
+ * report iommufd's iommu_iova_range. Free with free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
+						       u32 *nranges)
+{
+	struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+	struct vfio_iommu_type1_info *info;
+	struct vfio_info_cap_header *hdr;
+	struct iommu_iova_range *ranges = NULL;
+
+	info = vfio_iommu_get_info(device);
+	hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+	VFIO_ASSERT_NOT_NULL(hdr);
+
+	cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+	VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
+
+	ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+		ranges[i] = (struct iommu_iova_range){
+			.start = cap_range->iova_ranges[i].start,
+			.last = cap_range->iova_ranges[i].end,
+		};
+	}
+
+	*nranges = cap_range->nr_iovas;
+
+	free(info);
+	return ranges;
+}
+
+/* Return iova ranges of the device's IOAS. Free with free() */
+static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
+						    u32 *nranges)
+{
+	struct iommu_iova_range *ranges;
+	int ret;
+
+	struct iommu_ioas_iova_ranges query = {
+		.size = sizeof(query),
+		.ioas_id = device->ioas_id,
+	};
+
+	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+	VFIO_ASSERT_EQ(ret, -1);
+	VFIO_ASSERT_EQ(errno, EMSGSIZE);
+	VFIO_ASSERT_GT(query.num_iovas, 0);
+
+	ranges = calloc(query.num_iovas, sizeof(*ranges));
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	query.allowed_iovas = (uintptr_t)ranges;
+
+	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+	*nranges = query.num_iovas;
+
+	return ranges;
+}
+
+static int iova_range_comp(const void *a, const void *b)
+{
+	const struct iommu_iova_range *ra = a, *rb = b;
+
+	if (ra->start < rb->start)
+		return -1;
+
+	if (ra->start > rb->start)
+		return 1;
+
+	return 0;
+}
+
+/* Return sorted IOVA ranges of the device. Free with free(). */
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+					      u32 *nranges)
+{
+	struct iommu_iova_range *ranges;
+
+	if (device->iommufd)
+		ranges = iommufd_iova_ranges(device, nranges);
+	else
+		ranges = vfio_iommu_iova_ranges(device, nranges);
+
+	if (!ranges)
+		return NULL;
+
+	VFIO_ASSERT_GT(*nranges, 0);
+
+	/* Sort and check that ranges are sane and non-overlapping */
+	qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
+	VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
+
+	for (u32 i = 1; i < *nranges; i++) {
+		VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
+		VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
+	}
+
+	return ranges;
+}
+
+struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device)
+{
+	struct iova_allocator *allocator;
+	struct iommu_iova_range *ranges;
+	u32 nranges;
+
+	ranges = vfio_pci_iova_ranges(device, &nranges);
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	allocator = malloc(sizeof(*allocator));
+	VFIO_ASSERT_NOT_NULL(allocator);
+
+	*allocator = (struct iova_allocator){
+		.ranges = ranges,
+		.nranges = nranges,
+		.range_idx = 0,
+		.range_offset = 0,
+	};
+
+	return allocator;
+}
+
+void iova_allocator_cleanup(struct iova_allocator *allocator)
+{
+	free(allocator->ranges);
+	free(allocator);
+}
+
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size)
+{
+	VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
+	VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
+
+	for (;;) {
+		struct iommu_iova_range *range;
+		iova_t iova, last;
+
+		VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges,
+			       "IOVA allocator out of space\n");
+
+		range = &allocator->ranges[allocator->range_idx];
+		iova = range->start + allocator->range_offset;
+
+		/* Check for sufficient space at the current offset */
+		if (check_add_overflow(iova, size - 1, &last) ||
+		    last > range->last)
+			goto next_range;
+
+		/* Align iova to size */
+		iova = last & ~(size - 1);
+
+		/* Check for sufficient space at the aligned iova */
+		if (check_add_overflow(iova, size - 1, &last) ||
+		    last > range->last)
+			goto next_range;
+
+		if (last == range->last) {
+			allocator->range_idx++;
+			allocator->range_offset = 0;
+		} else {
+			allocator->range_offset = last - range->start + 1;
+		}
+
+		return iova;
+
+next_range:
+		allocator->range_idx++;
+		allocator->range_offset = 0;
+	}
+}
+
 iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
 {
 	struct vfio_dma_region *region;
@@ -141,7 +386,7 @@ static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
 	ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
 }
 
-static void vfio_iommu_dma_map(struct vfio_pci_device *device,
+static int vfio_iommu_dma_map(struct vfio_pci_device *device,
 			       struct vfio_dma_region *region)
 {
 	struct vfio_iommu_type1_dma_map args = {
@@ -152,10 +397,13 @@ static void vfio_iommu_dma_map(struct vfio_pci_device *device,
 		.size = region->size,
 	};
 
-	ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args);
+	if (ioctl(device->container_fd, VFIO_IOMMU_MAP_DMA, &args))
+		return -errno;
+
+	return 0;
 }
 
-static void iommufd_dma_map(struct vfio_pci_device *device,
+static int iommufd_dma_map(struct vfio_pci_device *device,
 			    struct vfio_dma_region *region)
 {
 	struct iommu_ioas_map args = {
@@ -169,54 +417,108 @@ static void iommufd_dma_map(struct vfio_pci_device *device,
 		.ioas_id = device->ioas_id,
 	};
 
-	ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args);
+	if (ioctl(device->iommufd, IOMMU_IOAS_MAP, &args))
+		return -errno;
+
+	return 0;
 }
 
-void vfio_pci_dma_map(struct vfio_pci_device *device,
+int __vfio_pci_dma_map(struct vfio_pci_device *device,
 		      struct vfio_dma_region *region)
 {
+	int ret;
+
 	if (device->iommufd)
-		iommufd_dma_map(device, region);
+		ret = iommufd_dma_map(device, region);
 	else
-		vfio_iommu_dma_map(device, region);
+		ret = vfio_iommu_dma_map(device, region);
+
+	if (ret)
+		return ret;
 
 	list_add(&region->link, &device->dma_regions);
+
+	return 0;
 }
 
-static void vfio_iommu_dma_unmap(struct vfio_pci_device *device,
-				 struct vfio_dma_region *region)
+static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags,
+				u64 *unmapped)
 {
 	struct vfio_iommu_type1_dma_unmap args = {
 		.argsz = sizeof(args),
-		.iova = region->iova,
-		.size = region->size,
+		.iova = iova,
+		.size = size,
+		.flags = flags,
 	};
 
-	ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args);
+	if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
+		return -errno;
+
+	if (unmapped)
+		*unmapped = args.size;
+
+	return 0;
 }
 
-static void iommufd_dma_unmap(struct vfio_pci_device *device,
-			      struct vfio_dma_region *region)
+static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id,
+			     u64 *unmapped)
 {
 	struct iommu_ioas_unmap args = {
 		.size = sizeof(args),
-		.iova = region->iova,
-		.length = region->size,
-		.ioas_id = device->ioas_id,
+		.iova = iova,
+		.length = length,
+		.ioas_id = ioas_id,
 	};
 
-	ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args);
+	if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
+		return -errno;
+
+	if (unmapped)
+		*unmapped = args.length;
+
+	return 0;
 }
 
-void vfio_pci_dma_unmap(struct vfio_pci_device *device,
-			struct vfio_dma_region *region)
+int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
+			 struct vfio_dma_region *region, u64 *unmapped)
 {
+	int ret;
+
 	if (device->iommufd)
-		iommufd_dma_unmap(device, region);
+		ret = iommufd_dma_unmap(device->iommufd, region->iova,
+					region->size, device->ioas_id,
+					unmapped);
 	else
-		vfio_iommu_dma_unmap(device, region);
+		ret = vfio_iommu_dma_unmap(device->container_fd, region->iova,
+					   region->size, 0, unmapped);
+
+	if (ret)
+		return ret;
+
+	list_del_init(&region->link);
+
+	return 0;
+}
+
+int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped)
+{
+	int ret;
+	struct vfio_dma_region *curr, *next;
+
+	if (device->iommufd)
+		ret = iommufd_dma_unmap(device->iommufd, 0, UINT64_MAX,
+					device->ioas_id, unmapped);
+	else
+		ret = vfio_iommu_dma_unmap(device->container_fd, 0, 0,
+					   VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
+
+	if (ret)
+		return ret;
+
+	list_for_each_entry_safe(curr, next, &device->dma_regions, link)
+		list_del_init(&curr->link);
 
-	list_del(&region->link);
+	return 0;
 }
 
 static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index ab19c54a774d..102603d4407d 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -3,6 +3,8 @@
 #include <sys/mman.h>
 #include <unistd.h>
 
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
 #include <linux/limits.h>
 #include <linux/mman.h>
 #include <linux/sizes.h>
@@ -93,6 +95,7 @@ static int iommu_mapping_get(const char *bdf, u64 iova,
 
 FIXTURE(vfio_dma_mapping_test) {
 	struct vfio_pci_device *device;
+	struct iova_allocator *iova_allocator;
 };
 
 FIXTURE_VARIANT(vfio_dma_mapping_test) {
@@ -112,13 +115,17 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0);
 FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB);
 FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB);
 
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
 FIXTURE_SETUP(vfio_dma_mapping_test)
 {
 	self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+	self->iova_allocator = iova_allocator_init(self->device);
 }
 
 FIXTURE_TEARDOWN(vfio_dma_mapping_test)
 {
+	iova_allocator_cleanup(self->iova_allocator);
 	vfio_pci_device_cleanup(self->device);
 }
 
@@ -129,6 +136,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
 	struct vfio_dma_region region;
 	struct iommu_mapping mapping;
 	u64 mapping_size = size;
+	u64 unmapped;
 	int rc;
 
 	region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
@@ -139,7 +147,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
 	else
 		ASSERT_NE(region.vaddr, MAP_FAILED);
 
-	region.iova = (u64)region.vaddr;
+	region.iova = iova_allocator_alloc(self->iova_allocator, size);
 	region.size = size;
 
 	vfio_pci_dma_map(self->device, &region);
@@ -184,7 +192,9 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap)
 	}
 
 unmap:
-	vfio_pci_dma_unmap(self->device, &region);
+	rc = __vfio_pci_dma_unmap(self->device, &region, &unmapped);
+	ASSERT_EQ(rc, 0);
+	ASSERT_EQ(unmapped, region.size);
 	printf("Unmapped IOVA 0x%lx\n", region.iova);
 	ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr));
 	ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping));
@@ -192,6 +202,103 @@ unmap:
 	ASSERT_TRUE(!munmap(region.vaddr, size));
 }
 
+FIXTURE(vfio_dma_map_limit_test) {
+	struct vfio_pci_device *device;
+	struct vfio_dma_region region;
+	size_t mmap_size;
+};
+
+FIXTURE_VARIANT(vfio_dma_map_limit_test) {
+	const char *iommu_mode;
+};
+
+#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode)			       \
+FIXTURE_VARIANT_ADD(vfio_dma_map_limit_test, _iommu_mode) {		       \
+	.iommu_mode = #_iommu_mode,					       \
+}
+
+FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
+
+#undef FIXTURE_VARIANT_ADD_IOMMU_MODE
+
+FIXTURE_SETUP(vfio_dma_map_limit_test)
+{
+	struct vfio_dma_region *region = &self->region;
+	struct iommu_iova_range *ranges;
+	u64 region_size = getpagesize();
+	iova_t last_iova;
+	u32 nranges;
+
+	/*
+	 * Over-allocate mmap by double the size to provide enough backing vaddr
+	 * for overflow tests
+	 */
+	self->mmap_size = 2 * region_size;
+
+	self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+	region->vaddr = mmap(NULL, self->mmap_size, PROT_READ | PROT_WRITE,
+			     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	ASSERT_NE(region->vaddr, MAP_FAILED);
+
+	ranges = vfio_pci_iova_ranges(self->device, &nranges);
+	VFIO_ASSERT_NOT_NULL(ranges);
+	last_iova = ranges[nranges - 1].last;
+	free(ranges);
+
+	/* One page prior to the last iova */
+	region->iova = last_iova & ~(region_size - 1);
+	region->size = region_size;
+}
+
+FIXTURE_TEARDOWN(vfio_dma_map_limit_test)
+{
+	vfio_pci_device_cleanup(self->device);
+	ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0);
+}
+
+TEST_F(vfio_dma_map_limit_test, unmap_range)
+{
+	struct vfio_dma_region *region = &self->region;
+	u64 unmapped;
+	int rc;
+
+	vfio_pci_dma_map(self->device, region);
+	ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
+
+	rc = __vfio_pci_dma_unmap(self->device, region, &unmapped);
+	ASSERT_EQ(rc, 0);
+	ASSERT_EQ(unmapped, region->size);
+}
+
+TEST_F(vfio_dma_map_limit_test, unmap_all)
+{
+	struct vfio_dma_region *region = &self->region;
+	u64 unmapped;
+	int rc;
+
+	vfio_pci_dma_map(self->device, region);
+	ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
+
+	rc = __vfio_pci_dma_unmap_all(self->device, &unmapped);
+	ASSERT_EQ(rc, 0);
+	ASSERT_EQ(unmapped, region->size);
+}
+
+TEST_F(vfio_dma_map_limit_test, overflow)
+{
+	struct vfio_dma_region *region = &self->region;
+	int rc;
+
+	region->iova = ~(iova_t)0 & ~(region->size - 1);
+	region->size = self->mmap_size;
+
+	rc = __vfio_pci_dma_map(self->device, region);
+	ASSERT_EQ(rc, -EOVERFLOW);
+
+	rc = __vfio_pci_dma_unmap(self->device, region, NULL);
+	ASSERT_EQ(rc, -EOVERFLOW);
+}
+
 int main(int argc, char *argv[])
 {
 	device_bdf = vfio_selftests_get_bdf(&argc, argv);
diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
index 2dbd70b7db62..f69eec8b928d 100644
--- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c
+++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c
@@ -19,6 +19,7 @@ static const char *device_bdf;
 } while (0)
 
 static void region_setup(struct vfio_pci_device *device,
+			 struct iova_allocator *iova_allocator,
 			 struct vfio_dma_region *region, u64 size)
 {
 	const int flags = MAP_SHARED | MAP_ANONYMOUS;
@@ -29,7 +30,7 @@ static void region_setup(struct vfio_pci_device *device,
 	VFIO_ASSERT_NE(vaddr, MAP_FAILED);
 
 	region->vaddr = vaddr;
-	region->iova = (u64)vaddr;
+	region->iova = iova_allocator_alloc(iova_allocator, size);
 	region->size = size;
 
 	vfio_pci_dma_map(device, region);
@@ -44,6 +45,7 @@ static void region_teardown(struct vfio_pci_device *device,
 
 FIXTURE(vfio_pci_driver_test) {
 	struct vfio_pci_device *device;
+	struct iova_allocator *iova_allocator;
 	struct vfio_dma_region memcpy_region;
 	void *vaddr;
 	int msi_fd;
@@ -72,14 +74,15 @@ FIXTURE_SETUP(vfio_pci_driver_test)
 	struct vfio_pci_driver *driver;
 
 	self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode);
+	self->iova_allocator = iova_allocator_init(self->device);
 
 	driver = &self->device->driver;
 
-	region_setup(self->device, &self->memcpy_region, SZ_1G);
-	region_setup(self->device, &driver->region, SZ_2M);
+	region_setup(self->device, self->iova_allocator, &self->memcpy_region, SZ_1G);
+	region_setup(self->device, self->iova_allocator, &driver->region, SZ_2M);
 
 	/* Any IOVA that doesn't overlap memcpy_region and driver->region. */
-	self->unmapped_iova = 8UL * SZ_1G;
+	self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G);
 
 	vfio_pci_driver_init(self->device);
 	self->msi_fd = self->device->msi_eventfds[driver->msi];
@@ -108,6 +111,7 @@ FIXTURE_TEARDOWN(vfio_pci_driver_test)
 	region_teardown(self->device, &self->memcpy_region);
 	region_teardown(self->device, &driver->region);
 
+	iova_allocator_cleanup(self->iova_allocator);
 	vfio_pci_device_cleanup(self->device);
 }
 
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
index edacebfc1632..c7b270dd77a9 100755
--- a/tools/testing/selftests/vsock/vmtest.sh
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -7,6 +7,8 @@
 #		* virtme-ng
 #		* busybox-static (used by virtme-ng)
 #		* qemu	(used by virtme-ng)
+#
+# shellcheck disable=SC2317,SC2119
 
 readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
 readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../)
@@ -22,8 +24,9 @@ readonly SSH_HOST_PORT=2222
 readonly VSOCK_CID=1234
 readonly WAIT_PERIOD=3
 readonly WAIT_PERIOD_MAX=60
-readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX ))
-readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid)
+readonly WAIT_QEMU=5
+readonly PIDFILE_TEMPLATE=/tmp/vsock_vmtest_XXXX.pid
+declare -A PIDFILES
 
 # virtme-ng offers a netdev for ssh when using "--ssh", but we also need a
 # control port forwarded for vsock_test.  Because virtme-ng doesn't support
@@ -33,12 +36,6 @@ readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid)
 # add the kernel cmdline options that virtme-init uses to setup the interface.
 readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}"
 readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}"
-readonly QEMU_OPTS="\
-	 -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \
-	 -device virtio-net-pci,netdev=n0 \
-	 -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \
-	 --pidfile ${QEMU_PIDFILE} \
-"
 readonly KERNEL_CMDLINE="\
 	virtme.dhcp net.ifnames=0 biosdevname=0 \
 	virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \
@@ -51,6 +48,8 @@ readonly TEST_DESCS=(
 	"Run vsock_test using the loopback transport in the VM."
 )
 
+readonly USE_SHARED_VM=(vm_server_host_client vm_client_host_server vm_loopback)
+
 VERBOSE=0
 
 usage() {
@@ -84,21 +83,33 @@ die() {
 	exit "${KSFT_FAIL}"
 }
 
+check_result() {
+	local rc arg
+
+	rc=$1
+	arg=$2
+
+	cnt_total=$(( cnt_total + 1 ))
+
+	if [[ ${rc} -eq ${KSFT_PASS} ]]; then
+		cnt_pass=$(( cnt_pass + 1 ))
+		echo "ok ${cnt_total} ${arg}"
+	elif [[ ${rc} -eq ${KSFT_SKIP} ]]; then
+		cnt_skip=$(( cnt_skip + 1 ))
+		echo "ok ${cnt_total} ${arg} # SKIP"
+	elif [[ ${rc} -eq ${KSFT_FAIL} ]]; then
+		cnt_fail=$(( cnt_fail + 1 ))
+		echo "not ok ${cnt_total} ${arg} # exit=${rc}"
+	fi
+}
+
 vm_ssh() {
 	ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@"
 	return $?
 }
 
 cleanup() {
-	if [[ -s "${QEMU_PIDFILE}" ]]; then
-		pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1
-	fi
-
-	# If failure occurred during or before qemu start up, then we need
-	# to clean this up ourselves.
-	if [[ -e "${QEMU_PIDFILE}" ]]; then
-		rm "${QEMU_PIDFILE}"
-	fi
+	terminate_pidfiles "${!PIDFILES[@]}"
 }
 
 check_args() {
@@ -147,7 +158,7 @@ check_vng() {
 	local version
 	local ok
 
-	tested_versions=("1.33" "1.36")
+	tested_versions=("1.33" "1.36" "1.37")
 	version="$(vng --version)"
 
 	ok=0
@@ -188,10 +199,37 @@ handle_build() {
 	popd &>/dev/null
 }
 
+create_pidfile() {
+	local pidfile
+
+	pidfile=$(mktemp "${PIDFILE_TEMPLATE}")
+	PIDFILES["${pidfile}"]=1
+
+	echo "${pidfile}"
+}
+
+terminate_pidfiles() {
+	local pidfile
+
+	for pidfile in "$@"; do
+		if [[ -s "${pidfile}" ]]; then
+			pkill -SIGTERM -F "${pidfile}" > /dev/null 2>&1
+		fi
+
+		if [[ -e "${pidfile}" ]]; then
+			rm -f "${pidfile}"
+		fi
+
+		unset "PIDFILES[${pidfile}]"
+	done
+}
+
 vm_start() {
+	local pidfile=$1
 	local logfile=/dev/null
 	local verbose_opt=""
 	local kernel_opt=""
+	local qemu_opts=""
 	local qemu
 
 	qemu=$(command -v "${QEMU}")
@@ -201,6 +239,13 @@ vm_start() {
 		logfile=/dev/stdout
 	fi
 
+	qemu_opts="\
+		 -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \
+		 -device virtio-net-pci,netdev=n0 \
+		 -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \
+		--pidfile ${pidfile}
+	"
+
 	if [[ "${BUILD}" -eq 1 ]]; then
 		kernel_opt="${KERNEL_CHECKOUT}"
 	fi
@@ -209,16 +254,14 @@ vm_start() {
 		--run \
 		${kernel_opt} \
 		${verbose_opt} \
-		--qemu-opts="${QEMU_OPTS}" \
+		--qemu-opts="${qemu_opts}" \
 		--qemu="${qemu}" \
 		--user root \
 		--append "${KERNEL_CMDLINE}" \
 		--rw  &> ${logfile} &
 
-	if ! timeout ${WAIT_TOTAL} \
-		bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then
-		die "failed to boot VM"
-	fi
+	timeout "${WAIT_QEMU}" \
+		bash -c 'while [[ ! -s '"${pidfile}"' ]]; do sleep 1; done; exit 0'
 }
 
 vm_wait_for_ssh() {
@@ -251,9 +294,11 @@ wait_for_listener()
 
 	# for tcp protocol additionally check the socket state
 	[ "${protocol}" = "tcp" ] && pattern="${pattern}0A"
+
 	for i in $(seq "${max_intervals}"); do
-		if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \
-		   grep -q "${pattern}"; then
+		if awk -v pattern="${pattern}" \
+			'BEGIN {rc=1} $2" "$4 ~ pattern {rc=0} END {exit rc}' \
+			/proc/net/"${protocol}"*; then
 			break
 		fi
 		sleep "${interval}"
@@ -270,113 +315,196 @@ EOF
 }
 
 host_wait_for_listener() {
-	wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
+	local port=$1
+
+	wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
 }
 
-__log_stdin() {
-	cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+vm_vsock_test() {
+	local host=$1
+	local cid=$2
+	local port=$3
+	local rc
+
+	# log output and use pipefail to respect vsock_test errors
+	set -o pipefail
+	if [[ "${host}" != server ]]; then
+		vm_ssh -- "${VSOCK_TEST}" \
+			--mode=client \
+			--control-host="${host}" \
+			--peer-cid="${cid}" \
+			--control-port="${port}" \
+			2>&1 | log_guest
+		rc=$?
+	else
+		vm_ssh -- "${VSOCK_TEST}" \
+			--mode=server \
+			--peer-cid="${cid}" \
+			--control-port="${port}" \
+			2>&1 | log_guest &
+		rc=$?
+
+		if [[ $rc -ne 0 ]]; then
+			set +o pipefail
+			return $rc
+		fi
+
+		vm_wait_for_listener "${port}"
+		rc=$?
+	fi
+	set +o pipefail
+
+	return $rc
 }
 
-__log_args() {
-	echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }'
+host_vsock_test() {
+	local host=$1
+	local cid=$2
+	local port=$3
+	local rc
+
+	# log output and use pipefail to respect vsock_test errors
+	set -o pipefail
+	if [[ "${host}" != server ]]; then
+		${VSOCK_TEST} \
+			--mode=client \
+			--peer-cid="${cid}" \
+			--control-host="${host}" \
+			--control-port="${port}" 2>&1 | log_host
+		rc=$?
+	else
+		${VSOCK_TEST} \
+			--mode=server \
+			--peer-cid="${cid}" \
+			--control-port="${port}" 2>&1 | log_host &
+		rc=$?
+
+		if [[ $rc -ne 0 ]]; then
+			set +o pipefail
+			return $rc
+		fi
+
+		host_wait_for_listener "${port}"
+		rc=$?
+	fi
+	set +o pipefail
+
+	return $rc
 }
 
 log() {
-	local prefix="$1"
+	local redirect
+	local prefix
 
-	shift
-	local redirect=
 	if [[ ${VERBOSE} -eq 0 ]]; then
 		redirect=/dev/null
 	else
 		redirect=/dev/stdout
 	fi
 
+	prefix="${LOG_PREFIX:-}"
+
 	if [[ "$#" -eq 0 ]]; then
-		__log_stdin | tee -a "${LOG}" > ${redirect}
+		if [[ -n "${prefix}" ]]; then
+			awk -v prefix="${prefix}" '{printf "%s: %s\n", prefix, $0}'
+		else
+			cat
+		fi
 	else
-		__log_args "$@" | tee -a "${LOG}" > ${redirect}
-	fi
-}
-
-log_setup() {
-	log "setup" "$@"
+		if [[ -n "${prefix}" ]]; then
+			echo "${prefix}: " "$@"
+		else
+			echo "$@"
+		fi
+	fi | tee -a "${LOG}" > "${redirect}"
 }
 
 log_host() {
-	local testname=$1
-
-	shift
-	log "test:${testname}:host" "$@"
+	LOG_PREFIX=host log "$@"
 }
 
 log_guest() {
-	local testname=$1
-
-	shift
-	log "test:${testname}:guest" "$@"
+	LOG_PREFIX=guest log "$@"
 }
 
 test_vm_server_host_client() {
-	local testname="${FUNCNAME[0]#test_}"
+	if ! vm_vsock_test "server" 2 "${TEST_GUEST_PORT}"; then
+		return "${KSFT_FAIL}"
+	fi
 
-	vm_ssh -- "${VSOCK_TEST}" \
-		--mode=server \
-		--control-port="${TEST_GUEST_PORT}" \
-		--peer-cid=2 \
-		2>&1 | log_guest "${testname}" &
+	if ! host_vsock_test "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then
+		return "${KSFT_FAIL}"
+	fi
 
-	vm_wait_for_listener "${TEST_GUEST_PORT}"
+	return "${KSFT_PASS}"
+}
 
-	${VSOCK_TEST} \
-		--mode=client \
-		--control-host=127.0.0.1 \
-		--peer-cid="${VSOCK_CID}" \
-		--control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}"
+test_vm_client_host_server() {
+	if ! host_vsock_test "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then
+		return "${KSFT_FAIL}"
+	fi
 
-	return $?
+	if ! vm_vsock_test "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then
+		return "${KSFT_FAIL}"
+	fi
+
+	return "${KSFT_PASS}"
 }
 
-test_vm_client_host_server() {
-	local testname="${FUNCNAME[0]#test_}"
+test_vm_loopback() {
+	local port=60000 # non-forwarded local port
 
-	${VSOCK_TEST} \
-		--mode "server" \
-		--control-port "${TEST_HOST_PORT_LISTENER}" \
-		--peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" &
+	vm_ssh -- modprobe vsock_loopback &> /dev/null || :
 
-	host_wait_for_listener
+	if ! vm_vsock_test "server" 1 "${port}"; then
+		return "${KSFT_FAIL}"
+	fi
 
-	vm_ssh -- "${VSOCK_TEST}" \
-		--mode=client \
-		--control-host=10.0.2.2 \
-		--peer-cid=2 \
-		--control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}"
+	if ! vm_vsock_test "127.0.0.1" 1 "${port}"; then
+		return "${KSFT_FAIL}"
+	fi
 
-	return $?
+	return "${KSFT_PASS}"
 }
 
-test_vm_loopback() {
-	local testname="${FUNCNAME[0]#test_}"
-	local port=60000 # non-forwarded local port
+shared_vm_test() {
+	local tname
+
+	tname="${1}"
+
+	for testname in "${USE_SHARED_VM[@]}"; do
+		if [[ "${tname}" == "${testname}" ]]; then
+			return 0
+		fi
+	done
 
-	vm_ssh -- "${VSOCK_TEST}" \
-		--mode=server \
-		--control-port="${port}" \
-		--peer-cid=1 2>&1 | log_guest "${testname}" &
+	return 1
+}
 
-	vm_wait_for_listener "${port}"
+shared_vm_tests_requested() {
+	for arg in "$@"; do
+		if shared_vm_test "${arg}"; then
+			return 0
+		fi
+	done
 
-	vm_ssh -- "${VSOCK_TEST}" \
-		--mode=client \
-		--control-host="127.0.0.1" \
-		--control-port="${port}" \
-		--peer-cid=1 2>&1 | log_guest "${testname}"
+	return 1
+}
 
-	return $?
+run_shared_vm_tests() {
+	local arg
+
+	for arg in "$@"; do
+		if ! shared_vm_test "${arg}"; then
+			continue
+		fi
+
+		run_shared_vm_test "${arg}"
+		check_result "$?" "${arg}"
+	done
 }
 
-run_test() {
+run_shared_vm_test() {
 	local host_oops_cnt_before
 	local host_warn_cnt_before
 	local vm_oops_cnt_before
@@ -389,9 +517,9 @@ run_test() {
 	local rc
 
 	host_oops_cnt_before=$(dmesg | grep -c -i 'Oops')
-	host_warn_cnt_before=$(dmesg --level=warn | wc -l)
+	host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock')
 	vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
-	vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l)
+	vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
 
 	name=$(echo "${1}" | awk '{ print $1 }')
 	eval test_"${name}"
@@ -399,31 +527,32 @@ run_test() {
 
 	host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l)
 	if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then
-		echo "FAIL: kernel oops detected on host" | log_host "${name}"
+		echo "FAIL: kernel oops detected on host" | log_host
 		rc=$KSFT_FAIL
 	fi
 
-	host_warn_cnt_after=$(dmesg --level=warn | wc -l)
+	host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock')
 	if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then
-		echo "FAIL: kernel warning detected on host" | log_host "${name}"
+		echo "FAIL: kernel warning detected on host" | log_host
 		rc=$KSFT_FAIL
 	fi
 
 	vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
 	if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
-		echo "FAIL: kernel oops detected on vm" | log_host "${name}"
+		echo "FAIL: kernel oops detected on vm" | log_host
 		rc=$KSFT_FAIL
 	fi
 
-	vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l)
+	vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
 	if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
-		echo "FAIL: kernel warning detected on vm" | log_host "${name}"
+		echo "FAIL: kernel warning detected on vm" | log_host
 		rc=$KSFT_FAIL
 	fi
 
 	return "${rc}"
 }
 
+BUILD=0
 QEMU="qemu-system-$(uname -m)"
 
 while getopts :hvsq:b o
@@ -452,30 +581,21 @@ handle_build
 
 echo "1..${#ARGS[@]}"
 
-log_setup "Booting up VM"
-vm_start
-vm_wait_for_ssh
-log_setup "VM booted up"
-
 cnt_pass=0
 cnt_fail=0
 cnt_skip=0
 cnt_total=0
-for arg in "${ARGS[@]}"; do
-	run_test "${arg}"
-	rc=$?
-	if [[ ${rc} -eq $KSFT_PASS ]]; then
-		cnt_pass=$(( cnt_pass + 1 ))
-		echo "ok ${cnt_total} ${arg}"
-	elif [[ ${rc} -eq $KSFT_SKIP ]]; then
-		cnt_skip=$(( cnt_skip + 1 ))
-		echo "ok ${cnt_total} ${arg} # SKIP"
-	elif [[ ${rc} -eq $KSFT_FAIL ]]; then
-		cnt_fail=$(( cnt_fail + 1 ))
-		echo "not ok ${cnt_total} ${arg} # exit=$rc"
-	fi
-	cnt_total=$(( cnt_total + 1 ))
-done
+
+if shared_vm_tests_requested "${ARGS[@]}"; then
+	log_host "Booting up VM"
+	pidfile="$(create_pidfile)"
+	vm_start "${pidfile}"
+	vm_wait_for_ssh
+	log_host "VM booted up"
+
+	run_shared_vm_tests "${ARGS[@]}"
+	terminate_pidfiles "${pidfile}"
+fi
 
 echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
 echo "Log: ${LOG}"
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 05e1e6774fba..918eaec8bfbe 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -308,12 +308,13 @@ static void test_getcpu(int cpu)
 #ifdef __x86_64__
 
 static jmp_buf jmpbuf;
-static volatile unsigned long segv_err;
+static volatile unsigned long segv_err, segv_trapno;
 
 static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
 {
 	ucontext_t *ctx = (ucontext_t *)ctx_void;
 
+	segv_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
 	segv_err =  ctx->uc_mcontext.gregs[REG_ERR];
 	siglongjmp(jmpbuf, 1);
 }
@@ -336,7 +337,8 @@ static void test_vsys_r(void)
 	else if (can_read)
 		ksft_test_result_pass("We have read access\n");
 	else
-		ksft_test_result_pass("We do not have read access: #PF(0x%lx)\n", segv_err);
+		ksft_test_result_pass("We do not have read access (trap=%ld, error=0x%lx)\n",
+				      segv_trapno, segv_err);
 }
 
 static void test_vsys_x(void)
@@ -347,7 +349,7 @@ static void test_vsys_x(void)
 		return;
 	}
 
-	ksft_print_msg("Make sure that vsyscalls really page fault\n");
+	ksft_print_msg("Make sure that vsyscalls really cause a fault\n");
 
 	bool can_exec;
 	if (sigsetjmp(jmpbuf, 1) == 0) {
@@ -358,13 +360,14 @@ static void test_vsys_x(void)
 	}
 
 	if (can_exec)
-		ksft_test_result_fail("Executing the vsyscall did not page fault\n");
-	else if (segv_err & (1 << 4)) /* INSTR */
-		ksft_test_result_pass("Executing the vsyscall page failed: #PF(0x%lx)\n",
-				      segv_err);
+		ksft_test_result_fail("Executing the vsyscall did not fault\n");
+	/* #GP or #PF (with X86_PF_INSTR) */
+	else if ((segv_trapno == 13) || ((segv_trapno == 14) && (segv_err & (1 << 4))))
+		ksft_test_result_pass("Executing the vsyscall page failed (trap=%ld, error=0x%lx)\n",
+				      segv_trapno, segv_err);
 	else
-		ksft_test_result_fail("Execution failed with the wrong error: #PF(0x%lx)\n",
-				      segv_err);
+		ksft_test_result_fail("Execution failed with the wrong error (trap=%ld, error=0x%lx)\n",
+				      segv_trapno, segv_err);
 }
 
 /*
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index d4517386e551..9e1250790f33 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -2015,6 +2015,11 @@ static void test_stream_transport_change_client(const struct test_opts *opts)
 			exit(EXIT_FAILURE);
 		}
 
+		/* Although setting SO_LINGER does not affect the original test
+		 * for null-ptr-deref, it may trigger a lockdep warning.
+		 */
+		enable_so_linger(s, 1);
+
 		ret = connect(s, (struct sockaddr *)&sa, sizeof(sa));
 		/* The connect can fail due to signals coming from the thread,
 		 * or because the receiver connection queue is full.
@@ -2352,7 +2357,7 @@ static struct test_case test_cases[] = {
 		.run_server = test_stream_nolinger_server,
 	},
 	{
-		.name = "SOCK_STREAM transport change null-ptr-deref",
+		.name = "SOCK_STREAM transport change null-ptr-deref, lockdep warn",
 		.run_client = test_stream_transport_change_client,
 		.run_server = test_stream_transport_change_server,
 	},
diff --git a/tools/thermal/thermal-engine/thermal-engine.c b/tools/thermal/thermal-engine/thermal-engine.c
index 0764dc754771..66b0ba1fcd23 100644
--- a/tools/thermal/thermal-engine/thermal-engine.c
+++ b/tools/thermal/thermal-engine/thermal-engine.c
@@ -374,7 +374,7 @@ int main(int argc, char *argv[])
 	}
 
 	if (options.daemonize && daemon(0, 0)) {
-		ERROR("Failed to daemonize: %p\n");
+		ERROR("Failed to daemonize: %m\n");
 		return THERMAL_ENGINE_DAEMON_ERROR;
 	}
 
diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c
index cf263fe9deaf..ef97916e3873 100644
--- a/tools/tracing/latency/latency-collector.c
+++ b/tools/tracing/latency/latency-collector.c
@@ -1725,7 +1725,7 @@ static void show_usage(void)
 "-n, --notrace\t\tIf latency is detected, do not print out the content of\n"
 "\t\t\tthe trace file to standard output\n\n"
 
-"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
+"-e, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n"
 
 "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n"
 "\t\t\t%ld ms, before reading the trace file. The\n"